1 |
146 |
Agner |
% chapter included in forwardcom.tex
|
2 |
|
|
\documentclass[forwardcom.tex]{subfiles}
|
3 |
|
|
\begin{document}
|
4 |
|
|
\RaggedRight
|
5 |
|
|
|
6 |
|
|
\chapter{Instruction lists}\label{chap:InstructionLists}
|
7 |
|
|
The ForwardCom instructions are listed in a comma-separated file instruction\_list.csv. This file is intended for use by assemblers, disassemblers, debuggers and emulators. The list is preliminary and subject to possible changes. Please remember to keep the lists in this document and the list in the instruction\_list.cvs file synchronized.
|
8 |
|
|
\vv
|
9 |
|
|
|
10 |
|
|
The instruction list file has the following fields:
|
11 |
|
|
|
12 |
|
|
\begin{longtable} {|p{18mm}|p{100mm}|}
|
13 |
|
|
\caption{Fields in instruction list file}
|
14 |
|
|
\label{table:fieldsInInstructionListFile}
|
15 |
|
|
\\
|
16 |
|
|
\endfirsthead
|
17 |
|
|
\endhead
|
18 |
|
|
\hline
|
19 |
|
|
\bfseries Field & \bfseries Meaning \\
|
20 |
|
|
\hline
|
21 |
|
|
Name & Name of instruction as used by assembler. \\
|
22 |
|
|
\hline
|
23 |
|
|
Category & 1: single format instruction, \newline
|
24 |
|
|
2: unused, \newline
|
25 |
|
|
3: multi-format instruction, \newline
|
26 |
|
|
4: jump instruction. \\
|
27 |
|
|
\hline
|
28 |
|
|
Formats & See table \ref{table:MeaningOfFormatsFieldInInstructionListFile} below. \\
|
29 |
|
|
\hline
|
30 |
|
|
Template & Hexadecimal number: \newline
|
31 |
|
|
0xA - 0xE for template A - E, \newline
|
32 |
|
|
0x0 for multiple templates. \\
|
33 |
|
|
\hline
|
34 |
|
|
Variant &
|
35 |
|
|
D0: No destination operand, no operand type.\newline
|
36 |
|
|
D1: No destination operand, but operand type specified.\newline
|
37 |
|
|
D2: Operand type ignored.\newline
|
38 |
|
|
D3: Destination register used for other purpose.\newline
|
39 |
|
|
F0: Can have mask register, but not fallback register.\newline
|
40 |
|
|
F1: Can have fallback register without mask register.\newline
|
41 |
|
|
I2: Immediate source operand is integer regardless of specified operand type.\newline
|
42 |
|
|
M0: Memory operand is destination.\newline
|
43 |
|
|
%M1: E formats with a memory operand use IM3 as an extra immediate operand.\newline (obsolete)
|
44 |
|
|
On: n bits of IM3 in E template format used for options (IM3 can be used for shift count only if it is not used for options).\newline
|
45 |
|
|
R0: Destination is a general purpose register.\newline
|
46 |
|
|
R1: First source operand is a general purpose register.\newline
|
47 |
|
|
R2: Second source operand is a general purpose register.\newline
|
48 |
|
|
RL: RT is a general purpose register specifying vector length.\newline
|
49 |
|
|
U0: Integer operands are unsigned.\newline
|
50 |
|
|
U3: Integer operands are unsigned if option bit 3 is set.\newline \hspace{6mm}
|
51 |
|
|
(compare instruction).\newline
|
52 |
|
|
H0: Half precision floating point instruction.\newline
|
53 |
|
|
X0: Source register can be a special pointer (threadp, datap, ip).\newline
|
54 |
|
|
X1: Source register is special register.\newline
|
55 |
|
|
X2: Source register is capabilities register.\newline
|
56 |
|
|
X3: Source register is performance monitor register.\newline
|
57 |
|
|
X4: Source register is system register.\newline
|
58 |
|
|
Y0-4:Destination register is one of the above.
|
59 |
|
|
\\ \hline
|
60 |
|
|
Source operands & Number of source operands, including register, memory and immediate operands, but not including mask, option bits, vector length, and index. \\
|
61 |
|
|
\hline
|
62 |
|
|
OP1 & Operation code OP1. \\
|
63 |
|
|
\hline
|
64 |
|
|
OP2 & Additional operation code OP2. Zero if none. \\
|
65 |
|
|
\hline
|
66 |
|
|
Operand types general purpose registers & Hexadecimal number indicating required and optional support for each operand type with general purpose registers. See table \ref{table:OperandTypesInInstructionList} below for meaning of each bit. \\
|
67 |
|
|
\hline
|
68 |
|
|
Operand types scalar & Hexadecimal number indicating required and optional support for each operand type for scalar operations in vector registers. See table \ref{table:OperandTypesInInstructionList} below for meaning of each bit. \\
|
69 |
|
|
\hline
|
70 |
|
|
Operand types vector & Hexadecimal number indicating required and optional support for each operand type for vector operations. See table \ref{table:OperandTypesInInstructionList} below for meaning of each bit. \\
|
71 |
|
|
\hline
|
72 |
|
|
Immediate operand type & Type of immediate operand for single-format instructions. See table \ref{table:immediateOperantTypesInInstructionList} below. \\
|
73 |
|
|
\hline
|
74 |
|
|
Description & Description of the instruction and comments. \\
|
75 |
|
|
\hline
|
76 |
|
|
\end{longtable}
|
77 |
|
|
|
78 |
|
|
\pagebreak % The text in the multirow box below disappears if there is a page break in it.
|
79 |
|
|
% Put page break here instead to prevent this
|
80 |
|
|
\label{table_format_field_in_list}
|
81 |
|
|
\begin{longtable} {|p{18mm}|p{20mm} p{80mm}|}
|
82 |
|
|
\caption{Meaning of formats field in instruction list file}
|
83 |
|
|
\label{table:MeaningOfFormatsFieldInInstructionListFile}
|
84 |
|
|
\\
|
85 |
|
|
\endfirsthead
|
86 |
|
|
\endhead
|
87 |
|
|
\hline
|
88 |
|
|
\bfseries Category & \multicolumn{2}{|l|}{\bfseries Interpretation of formats field} \\
|
89 |
|
|
\hline
|
90 |
|
|
1. Single format instruction & \multicolumn{2}{|p{102mm}|}{
|
91 |
|
|
Number with three hexadecimal digits. \newline
|
92 |
|
|
The leftmost digit is the value of the IL field (0-3). \newline
|
93 |
|
|
The middle digit is he value of mode field or the combined M+mode field (0-9).\newline
|
94 |
|
|
The rightmost digit is the sub-mode defined by OP2 in E template modes or OP1 in mode 2.5.x. Zero otherwise. \newline
|
95 |
|
|
For example 0x223 means format 2.2.3.
|
96 |
|
|
} \\
|
97 |
|
|
\hline
|
98 |
|
|
% \pagebreak % The text in the multirow box disappears if there is a page break in it.
|
99 |
|
|
% Put pagebreak here to prevent this, or before the table
|
100 |
|
|
% \hline
|
101 |
|
|
\multirow{27}{*}{\parbox[t]{18mm}{3. Multi-format instruction}}
|
102 |
|
|
& \multicolumn{2}{|l|}{
|
103 |
|
|
Hexadecimal number composed of one bit for each format supported:} \\
|
104 |
|
|
& 0x0000001 & Format 0.0: three general purpose registers. \\
|
105 |
|
|
& 0x0000002 & Format 0.1: two general purpose registers, 8-bit immediate. \\
|
106 |
|
|
& 0x0000004 & Format 0.2: Three vector registers. \\
|
107 |
|
|
& 0x0000008 & Format 0.3: Two vectors, 8-bit immediate. \\
|
108 |
|
|
& 0x0000010 & Format 0.4: One vector, memory operand. \\
|
109 |
|
|
& 0x0000020 & Format 0.5: One vector, memory operand with negative index. \\
|
110 |
|
|
& 0x0000040 & Format 0.6: One vector, scalar memory operand with index. \\
|
111 |
|
|
& 0x0000080 & Format 0.7: One vector, scalar memory operand with 8-bit offset. \\
|
112 |
|
|
& 0x0000100 & Format 0.8: One g. p. register, memory operand with index. \\
|
113 |
|
|
& 0x0000200 & Format 0.9: One g. p. register, memory operand with 8-bit offset. \\
|
114 |
|
|
|
115 |
|
|
& 0x0001000 & Format 2.8: Three g. p. registers, 32-bit immediate. \\
|
116 |
|
|
& 0x0002000 & Format 2.1: Two g. p. registers, memory with 32-bit offset. \\
|
117 |
|
|
& 0x0004000 & Format 2.3: Three vector registers, 32-bit immediate. \\
|
118 |
|
|
& 0x0008000 & Format 2.4: One vector register, memory with 32-bit offset. \\
|
119 |
|
|
|
120 |
|
|
& 0x0010000 & Format 2.0.0: Three g. p. reg., memory with 16-bit offset. \\
|
121 |
|
|
& 0x0020000 & Format 2.0.1: Two g. p. reg., memory with unscaled index. \\
|
122 |
|
|
& 0x0040000 & Format 2.0.2: Two g. p. reg., memory with scaled index. \\
|
123 |
|
|
& 0x0080000 & Format 2.0.3: Two g. p. reg., memory with index and limit.\\
|
124 |
|
|
& 0x0400000 & Format 2.0.6: Four g. p. reg.\\
|
125 |
|
|
& 0x0800000 & Format 2.0.7: Three g. p. registers, 16-bit shifted immediate. \\
|
126 |
|
|
|
127 |
|
|
& 0x1000000 & Format 2.2.0: Two vector reg., scalar memory w. 16-bit offset. \\
|
128 |
|
|
& 0x2000000 & Format 2.2.1: Two vector reg., memory with 16-bit offset. \\
|
129 |
|
|
& 0x4000000 & Format 2.2.2: Two vector reg., memory with negative index. \\
|
130 |
|
|
& 0x8000000 & Format 2.2.3: Two vector reg., scalar memory w. index and limit. \\
|
131 |
|
|
& 0x40000000 & Format 2.2.6: Four vector reg. \\
|
132 |
|
|
& 0x80000000 & Format 2.2.7: Three vector registers, 16-bit shifted immediate.\\
|
133 |
|
|
|
134 |
|
|
& 0x100000000 & Format 3.8: Three g. p. registers, 64-bit immediate. \\
|
135 |
|
|
& 0x40000 0000 & Format 3.3: Three vector registers, 64-bit immediate. \\
|
136 |
|
|
|
137 |
|
|
& 0x100000 0000 & Format 3.0.0: Three g. p. reg., memory with 32-bit offset. \\
|
138 |
|
|
& 0x800000 0000 & Format 3.0.3: Two g. p. reg., memory with index and 32-bit limit.\\
|
139 |
|
|
& 0x2000000 0000 & Format 3.0.5: One g. p. reg., memory with index and 16-bit offset, 32-bit immediate.\\
|
140 |
|
|
& 0x8000000 0000 & Format 3.0.7: Three g. p. registers, 32-bit shifted immediate. \\
|
141 |
|
|
|
142 |
|
|
& 0x10000000 0000 & Format 3.2.0: Two vector reg., scalar memory w. 32-bit offset. \\
|
143 |
|
|
& 0x20000000 0000 & Format 3.2.1: Two vector reg., memory with 32-bit offset. \\
|
144 |
|
|
& 0x80000000 0000 & Format 3.2.3: Two vector reg., scalar memory index and 32-bit limit. \\
|
145 |
|
|
& 0x200000000 0000 & Format 3.2.5: One vector reg., memory with 16-bit offset, and 32-bit immediate. \\
|
146 |
|
|
& 0x800000000 0000 & Format 3.2.7: Three vector registers, float or 32-bit shifted immediate.\\
|
147 |
|
|
|
148 |
|
|
\hline
|
149 |
|
|
|
150 |
|
|
\multirow{12}{*}{\parbox[t]{18mm}{4. Jump instruction}}
|
151 |
|
|
& \multicolumn{2}{|l|}{
|
152 |
|
|
Hexadecimal number composed of one bit for each format supported:} \\
|
153 |
|
|
& 0x00001 & Format 1.6.0 B: Two registers, 8 bit offset. \\
|
154 |
|
|
& 0x00002 & Format 1.7.1 C: One register, 8 bit immediate, 8 bit offset. \\
|
155 |
|
|
& 0x00010 & Format 2.5.0 A: Three registers, 24 bit offset. \\
|
156 |
|
|
& 0x00020 & Format 2.5.1 B: Two registers, 16 bit immediate, 16 bit offset. \\
|
157 |
|
|
& 0x00040 & Format 2.5.2 B: One register, memory operand with 16 bit address, 16 bit offset. \\
|
158 |
|
|
& 0x00080 & Format 2.5.3 B: Unused. \\
|
159 |
|
|
& 0x00100 & Format 2.5.4 C: One register, 8 bit immediate, 32 bit offset. \\
|
160 |
|
|
& 0x00200 & Format 2.5.5 C: One register, 32 bit immediate, 8 bit offset. \\
|
161 |
|
|
& 0x01000 & Format 3.1.0 A: Two registers, memory operand w 32 bit address, 24 bit offset. \\
|
162 |
|
|
& 0x02000 & Format 3.1.1 B: Two registers, 32 bit immediate, 32 bit offset. \\
|
163 |
|
|
& 0x10000 & Format 1.6.1 B: Memory operand with 8 bit offset. \\
|
164 |
|
|
& 0x20000 & Format 1.6.2 A: Reg. and memory w. scaled index. \\
|
165 |
|
|
& 0x40000 & Format 1.6.3 A: Three registers. \\
|
166 |
|
|
& 0x100000 & Format 1.7.0 D: No register, 24 bit address. \\
|
167 |
|
|
% & 0x200000 & Format 1.7.2 C: 16-bit offset. Unused \\
|
168 |
|
|
& 0x400000 & Format 1.7.3 C: One register. \\
|
169 |
|
|
& 0x800000 & Format 1.7.4 C: 16 bit immediate. \\
|
170 |
|
|
& 0x1000000 & Format 1.7.5 C: 16 bit fixed immediate. \\
|
171 |
|
|
& 0x2000000 & Format 1.7.A C: Format 1.7 with 64 bit operand size. \\
|
172 |
|
|
& 0x10000000 & Format 2.5.1 X: Two registers, 2x16 bit immediate. \\
|
173 |
|
|
& 0x20000000 & Format 2.5.2 X: One register, memory operand with 32 bit offset. \\
|
174 |
|
|
& 0x40000000 & Format 2.5.4 X: 64 bit operand size. \\
|
175 |
|
|
& 0x80000000 & Format 2.5.5 X: Conditional trap. \\
|
176 |
|
|
& 0x100000000 & Format 2.5.7 C: System call, 16 bit function, 32 bit module. \\
|
177 |
|
|
& 0x1000000 0000 & Format 3.1.1 X: System call, 32 bit function, 32 bit module. \\
|
178 |
|
|
\hline
|
179 |
|
|
\end{longtable}
|
180 |
|
|
|
181 |
|
|
\begin{longtable} {|p{18mm}|p{100mm}|}
|
182 |
|
|
\caption{
|
183 |
|
|
Indication of operand types supported for general purpose registers, scalars in vector
|
184 |
|
|
registers, or vectors. The value is a hexadecimal number composed of one bit for each operand
|
185 |
|
|
type supported}
|
186 |
|
|
\label{table:OperandTypesInInstructionList} \\
|
187 |
|
|
\endfirsthead
|
188 |
|
|
\endhead
|
189 |
|
|
\hline
|
190 |
|
|
0x0001 & 8-bit integer supported. \\
|
191 |
|
|
0x0002 & 16-bit integer supported. \\
|
192 |
|
|
0x0004 & 32-bit integer supported. \\
|
193 |
|
|
0x0008 & 64-bit integer supported. \\
|
194 |
|
|
0x0010 & 128-bit integer supported. \\
|
195 |
|
|
0x0020 & single precision floating point supported. \\
|
196 |
|
|
0x0040 & double precision floating point supported. \\
|
197 |
|
|
0x0080 & quadruple precision floating point supported. \\
|
198 |
|
|
0x0100 & 8-bit integer optionally supported. \\
|
199 |
|
|
0x0200 & 16-bit integer optionally supported. \\
|
200 |
|
|
0x0400 & 32-bit integer optionally supported. \\
|
201 |
|
|
0x0800 & 64-bit integer optionally supported. \\
|
202 |
|
|
0x1000 & 128-bit integer optionally supported. \\
|
203 |
|
|
0x2000 & single precision floating point optionally supported. \\
|
204 |
|
|
0x4000 & double precision floating point optionally supported. \\
|
205 |
|
|
0x8000 & quadruple precision floating point optionally supported. \\
|
206 |
|
|
\hline
|
207 |
|
|
\end{longtable}
|
208 |
|
|
|
209 |
|
|
\begin{longtable} {|p{18mm}|p{100mm}|}
|
210 |
|
|
\caption{
|
211 |
|
|
Immediate operand type for single-format instructions}
|
212 |
|
|
\label{table:immediateOperantTypesInInstructionList}
|
213 |
|
|
\\
|
214 |
|
|
\endfirsthead
|
215 |
|
|
\endhead
|
216 |
|
|
\hline
|
217 |
|
|
|
218 |
|
|
% 1 & 4-bit signed integer. \\
|
219 |
|
|
2 & 8-bit signed integer. \\
|
220 |
|
|
3 & 16-bit signed integer. \\
|
221 |
|
|
4 & 32-bit signed integer. \\
|
222 |
|
|
5 & 64-bit signed integer. \\
|
223 |
|
|
6 & 8-bit signed integer shifted by specified count. \\
|
224 |
|
|
7 & 16-bit signed integer shifted by specified count. \\
|
225 |
|
|
8 & 16-bit signed integer shifted by 16. \\
|
226 |
|
|
9 & 32-bit signed integer shifted by 32. \\
|
227 |
|
|
% 17 & 4-bit unsigned integer. \\
|
228 |
|
|
18 & 8-bit unsigned integer. \\
|
229 |
|
|
19 & 16-bit unsigned integer. \\
|
230 |
|
|
20 & 32-bit unsigned integer. \\
|
231 |
|
|
21 & 64-bit unsigned integer. \\
|
232 |
|
|
24 & two 8-bit unsigned integers. \\
|
233 |
|
|
25 & two 8-bit and one 6-bit unsigned integers. \\
|
234 |
|
|
26 & two 16-bit unsigned integers. \\
|
235 |
|
|
27 & one 16-bit and one 32-bit unsigned integer. \\
|
236 |
|
|
28 & two 32-bit unsigned integers. \\
|
237 |
|
|
29 & one 16-bit and two 8-bit unsigned integers. \\
|
238 |
|
|
% 33 & 4-bit unsigned integer converted to float. \\
|
239 |
|
|
34 & 8-bit signed integer converted to float. \\
|
240 |
|
|
35 & 16-bit signed integer converted to float. \\
|
241 |
|
|
64 & half precision floating point. \\
|
242 |
|
|
65 & single precision floating point. \\
|
243 |
|
|
66 & double precision floating point. \\
|
244 |
|
|
100 & determined by operand type. \\
|
245 |
|
|
in & a number prefixed by 'i' indicates an implicit value.
|
246 |
|
|
The implicit immediate operand with this value does not need to be written in the assembly code. \\
|
247 |
|
|
\hline
|
248 |
|
|
\end{longtable}
|
249 |
|
|
|
250 |
|
|
Jump instructions are listed on page \pageref{table:controlTransferInstructions}. All other categories of instructions are listed in the following tables.
|
251 |
|
|
|
252 |
|
|
|
253 |
|
|
\section{List of multi-format instructions}
|
254 |
|
|
The following list covers general instructions that can be coded in most or all of the formats
|
255 |
|
|
assigned to multi-format instructions.
|
256 |
|
|
|
257 |
|
|
\begin{longtable} {|p{25mm}|p{12mm}|p{12mm}|p{100mm}|}
|
258 |
|
|
\caption{
|
259 |
|
|
List of multi-format instructions}
|
260 |
|
|
\label{table:ListOfMultiFormatInstructions} \\
|
261 |
|
|
\endfirsthead
|
262 |
|
|
\endhead
|
263 |
|
|
\hline
|
264 |
|
|
\bfseries Instruction & \bfseries OP1 & \bfseries Source ope-rands & \bfseries Description \\
|
265 |
|
|
\hline
|
266 |
|
|
nop & 0 & 0 & No operation. \\
|
267 |
|
|
store & 1 & 1 & Store value to memory. \\
|
268 |
|
|
move & 2 & 1 & Copy value. \\
|
269 |
|
|
prefetch & 3 & 1 & Prefetch from memory. \\
|
270 |
|
|
sign\_extend & 4 & 1 & Sign-extend smaller integer to 64 bits. \\
|
271 |
|
|
sign\_extend\_ add & 5 & 2 & Sign-extend smaller integer to 64 bits and add 64-bit register. \\
|
272 |
|
|
compare & 7 & 2 & Compare. Uses condition codes, see p. \pageref{table:conditionCodesForCompareInstruction}. \\
|
273 |
|
|
add & 8 & 2 & src1 + src2. \\
|
274 |
|
|
sub & 9 & 2 & src1 - src2. \\
|
275 |
|
|
sub\_rev & 10 & 2 & src2 - src1. \\
|
276 |
|
|
mul & 11 & 2 & src1 $\cdot$ src2. \\
|
277 |
|
|
mul\_hi & 12 & 2 & (src1 $\cdot$ src2) $>>$ OS, signed (integer only). \\
|
278 |
|
|
mul\_hi\_u & 13 & 2 & (src1 $\cdot$ src2) $>>$ OS, unsigned (integer only). \\
|
279 |
|
|
div & 14 & 2 & src1 / src2, signed division (optional for integer vectors). \\
|
280 |
|
|
div\_u & 15 & 2 & src1 / src2, unsigned integer division (optional for vectors). \\
|
281 |
|
|
div\_rev & 16 & 2 & src2 / src1, signed division (optional for integer vectors). \\
|
282 |
|
|
rem & 18 & 2 & Modulo or remainder, signed (optional for integer vectors). \\
|
283 |
|
|
rem\_u & 19 & 2 & Modulo or remainder, unsigned (optional for integer vectors). \\
|
284 |
|
|
min & 20 & 2 & Signed minimum. \\
|
285 |
|
|
min\_u & 21 & 2 & Minimum. unsigned for integers, abs for f.p. \\
|
286 |
|
|
max & 22 & 2 & Signed maximum. \\
|
287 |
|
|
max\_u & 23 & 2 & Maximum. unsigned for integers, abs for f.p. \\
|
288 |
|
|
and & 26 & 2 & src1 \& src2. \\
|
289 |
|
|
or & 27 & 2 & src1 \textbar{} src2. \\
|
290 |
|
|
xor & 28 & 2 & src1 \^{} src2. \\
|
291 |
|
|
mul\_2pow & 32 & 2 & src1 * $2^{src2}$. Multiply by integer power of 2. Floating point only. \\
|
292 |
|
|
shift\_left & 32 & 2 & src1 $<<$ src2. Shift left. Integer only. \\
|
293 |
|
|
rotate & 33 & 2 & Rotate left if src2 positive, right if negative. \\
|
294 |
|
|
shift\_right\_s & 34 & 2 & src1 $>>$ src2. Integer shift right with sign extension.\\
|
295 |
|
|
shift\_right\_u & 35 & 2 & src1 $>>$ src2. Integer shift right with zero extension.\\
|
296 |
|
|
clear\_bit & 36 & 2 & Clear bit. src1 \& \~{} (1 $<<$ src2). \\
|
297 |
|
|
set\_bit & 37 & 2 & Set bit. src1 \textbar{} (1 $<<$ src2). \\
|
298 |
|
|
toggle\_bit & 38 & 2 & Toggle bit. src1 \^{} (1 $<<$ src2). \\
|
299 |
|
|
test\_bit & 39 & 2 & Test single bit. (src1 $>>$ src2) \& 1. \\
|
300 |
|
|
test\_bits\_and & 40 & 2 & Test if all indicated bits are 1. (src1 \& src2) == src2 \\test\_bits\_or & 41 & 2 & Test if at least one indicated bit is 1. (src1 \& src2) != 0 \\
|
301 |
|
|
add & 44 & 2 & src1 + scr2 (float16. optional). \\
|
302 |
|
|
sub & 45 & 2 & src1 - scr2 (float16. optional). \\
|
303 |
|
|
mul & 46 & 2 & src1 * scr2 (float16. optional). \\
|
304 |
|
|
mul\_add & 48 & 3 & $\pm$ src1 $\cdot$ src2 $\pm$ src3 (float16. optional). \\
|
305 |
|
|
mul\_add & 49 & 3 & $\pm$ src1 $\cdot$ src2 $\pm$ src3 (optional). \\
|
306 |
|
|
mul\_add2 & 50 & 3 & $\pm$ src1 $\cdot$ src3 $\pm$ src2 (optional). \\
|
307 |
|
|
add\_add & 51 & 3 & $\pm$ src1 $\pm$ src2 $\pm$ src3 (optional). \\
|
308 |
|
|
select\_bits & 52 & 3 & src1 \& src3 \textbar{} src2 \& \~{}src3 \\
|
309 |
|
|
funnel\_shift & 53 & 3 & Concatenate src1 and src2 and shift right by src3. \\
|
310 |
|
|
userdef56 - userdef62
|
311 |
|
|
& 56-62 & 2 & Reserved for user-defined instructions. \\
|
312 |
|
|
undef & 63 & 2 & Undefined code. Generates trap. \\
|
313 |
|
|
\hline
|
314 |
|
|
\end{longtable}
|
315 |
|
|
|
316 |
|
|
|
317 |
|
|
\section{List of single-format instructions}
|
318 |
|
|
These instructions are mostly available in only one or a few formats.
|
319 |
|
|
|
320 |
|
|
\begin{longtable} {|p{25mm}|p{14mm}|p{10mm}|p{95mm}|}
|
321 |
|
|
\caption{List of single-format instructions with general purpose registers}
|
322 |
|
|
\label{table:ListOfSingleFormatInstructionsGP} \\
|
323 |
|
|
\endfirsthead
|
324 |
|
|
\endhead
|
325 |
|
|
\hline
|
326 |
|
|
\bfseries Instruction & \bfseries Format &\bfseries OP1 & \bfseries Description \\
|
327 |
|
|
\hline
|
328 |
|
|
move & 1.1 C & 0 & Move 16-bit sign-extended constant to 32-bit general purpose register. \\
|
329 |
|
|
move & 1.1 C & 1 & Move 16-bit sign-extended constant to 64-bit general purpose register. \\
|
330 |
|
|
move & 1.1 C & 3 & Move 16-bit zero-extended constant to 64-bit general purpose register. \\
|
331 |
|
|
move & 1.1 C & 4 & RD = IM2 \textless\textless{} IM1. Sign-extend IM2 to 32 bits and shift left by the unsigned value IM1. \\
|
332 |
|
|
move & 1.1 C & 5 & RD = IM2 \textless\textless{} IM1. Sign-extend IM2 to 64 bits and shift left by the unsigned value IM1. \\
|
333 |
|
|
add & 1.1 C & 6 & Add 16-bit sign-extended constant to 32-bit general purpose register.. \\
|
334 |
|
|
mul & 1.1 C & 8 & Multiply 32-bit general purpose register by 16-bit sign-extended constant. \\
|
335 |
|
|
add & 1.1 C & 10 & RD += IM2 \textless\textless{} IM1. Sign-extend IM2 to 32 bits, shift left by the unsigned value IM1, add to RD. \\
|
336 |
|
|
add & 1.1 C & 11 & RD += IM2 \textless\textless{} IM1. Sign-extend IM2 to 64 bits, shift left by the unsigned value IM1, add to RD. \\
|
337 |
|
|
and & 1.1 C & 12 & RD \&= IM2 \textless\textless{} IM1. Sign-extend IM2 to 32 bits, shift left by the unsigned value IM1, AND with RD. \\
|
338 |
|
|
and & 1.1 C & 13 & RD \&= IM2 \textless\textless{} IM1. Sign-extend IM2 to 64 bits, shift left by the unsigned value IM1, AND with RD. \\
|
339 |
|
|
or & 1.1 C & 14 & RD \textbar{}= IM2 \textless\textless{} IM1. Sign-extend IM2 to 32 bits, shift left by the unsigned value IM1, OR with RD. \\
|
340 |
|
|
or & 1.1 C & 15 & RD \textbar{}= IM2 \textless\textless{} IM1. Sign-extend IM2 to 64 bits, shift left by the unsigned value IM1, OR with RD. \\
|
341 |
|
|
xor & 1.1 C & 16 & RD \^{}= IM2 \textless\textless{} IM1. Sign-extend IM2 to 32 bits, shift left by the unsigned value IM1, XOR with RD. \\
|
342 |
|
|
xor & 1.1 C & 17 & RD \^{}= IM2 \textless\textless{} IM1. Sign-extend IM2 to 64 bits, shift left by the unsigned value IM1, XOR with RD. \\
|
343 |
|
|
add & 1.1 C & 18 & RD += (IM1,IM2) \textless\textless{} 16. Shift 16-bit zero-extended constant left by 16 and add to 32-bit general purpose register. \\
|
344 |
|
|
|
345 |
|
|
abs & 1.8 B & 0 & Absolute value of integer. IM1 determines handling of overflow: 0: wrap around, 1: saturate, 2: zero. \\
|
346 |
|
|
%shift\_add & 1.8 B & 1 & Shift and add. RD += RS \textless\textless{} IM1 \\
|
347 |
|
|
bitscan & 1.8 B & 2 & Bit scan forward or reverse. Find index to first or last set bit. \\
|
348 |
|
|
roundp2 & 1.8 B & 3 & Round up or down to nearest power of 2. \\
|
349 |
|
|
popcount & 1.8 B & 4 & Count the number of bits that are 1.\\
|
350 |
|
|
read\_spec & 1.8 B & 32 & Read special register RS into g. p. register RD. \\
|
351 |
|
|
write\_spec & 1.8 B & 33 & Write g. p. register RS to special register RD. \\
|
352 |
|
|
read\_capabi-lities & 1.8 B & 34 & Read capabilities register RS into g. p. register RD. \\
|
353 |
|
|
write\_capabi-lities & 1.8 B & 35 & Write g. p. register RS to capabilities register RD. \\
|
354 |
|
|
read\_perf & 1.8 B & 36 & Read performance counter. \\
|
355 |
|
|
read\_perfs & 1.8 B & 37 & Read performance counter, serializing. \\
|
356 |
|
|
read\_sys & 1.8 B & 38 & Read system register RS into g. p. register RD. \\
|
357 |
|
|
write\_sys & 1.8 B & 39 & Write g. p. register RS to system register RD. \\
|
358 |
|
|
push & 1.8 B & 56 & Push g. p. register RS to stack with pointer RD. \\
|
359 |
|
|
pop & 1.8 B & 57 & Pop g. p. register RS from stack with pointer RD. \\
|
360 |
|
|
input & 1.8 B & 62 & Read RD from input port with address IM1 or RS. (privileged instruction) \\
|
361 |
|
|
output & 1.8 B & 63 & Write RD to output port with address IM1 or RS. (privileged instruction) \\
|
362 |
|
|
|
363 |
|
|
truth\_tab3 & 2.0.6 E & 8.1 & Boolean function of three inputs, given by a truth table. \\
|
364 |
|
|
|
365 |
|
|
move\_bits & 2.0.7 E & 0.1 & Replace one or more contiguous bits at one position of RS with contiguous bits from another position of RT. Optional. \\
|
366 |
|
|
|
367 |
|
|
move & 2.9 A & 0 & Load 32-bit constant into the high part of a general purpose register. The low part is zero. RD = IM2 \textless\textless{} 32. \\
|
368 |
|
|
insert\_hi & 2.9 A & 1 & Insert 32-bit constant into the high part of a general purpose register, leaving the low part unchanged.
|
369 |
|
|
RD = (RT \& 0xFFFFFFFF) \textbar{} (IM2 \textless\textless{} 32). \\
|
370 |
|
|
add & 2.9 A & 2 & Add zero-extended 32-bit constant to general purpose register. \\
|
371 |
|
|
sub & 2.9 A & 3 & Subtract zero-extended 32-bit constant from general purpose register. \\
|
372 |
|
|
add & 2.9 A & 4 & Add 32-bit constant to high part of general purpose register. RD = RT + (IM2 \textless\textless{} 32). \\
|
373 |
|
|
and & 2.9 A & 5 & AND high part of general purpose register with 32-bit constant. RD = RT \& (IM2 \textless\textless{} 32). \\
|
374 |
|
|
or & 2.9 A & 6 & OR high part of general purpose register with 32-bit constant. RD = RT \textbar{} (IM2 \textless\textless{} 32). \\
|
375 |
|
|
xor & 2.9 A & 7 & XOR high part of general purpose register with 32-bit constant. RD = RT \^{} (IM2 \textless\textless{} 32). \\
|
376 |
|
|
address & 2.9 A & 32 & RD = RT + IM2, RT can be THREADP (28), DATAP (29) or IP (30). \\
|
377 |
|
|
\hline
|
378 |
|
|
\end{longtable}
|
379 |
|
|
|
380 |
|
|
|
381 |
|
|
\begin{longtable} {|p{25mm}|p{14mm}|p{10mm}|p{95mm}|}
|
382 |
|
|
\caption{List of single-format instructions with vector registers and mixed register types}
|
383 |
|
|
\label{table:ListOfSingleFormatInstructionsVector} \\
|
384 |
|
|
\endfirsthead
|
385 |
|
|
\endhead
|
386 |
|
|
\hline
|
387 |
|
|
\bfseries Instruction & \bfseries Format &\bfseries OP1. OP2 & \bfseries Description \\
|
388 |
|
|
\hline
|
389 |
|
|
get\_len & 1.2 A & 0 & Get length of vector register RT into general purpose register RD. \\
|
390 |
|
|
get\_num & 1.2 A & 1 & Get length of vector register RT divided by the operand size. \\
|
391 |
|
|
set\_len & 1.2 A & 2 & RD = vector register RS with length changed to value of RT. \\
|
392 |
|
|
set\_num & 1.2 A & 3 & Change the length of vector register RS to RT$\cdot$OS. \\
|
393 |
|
|
insert & 1.2 A & 4 & Replace one element in vector RD, starting at offset RT$\cdot$OS, with scalar RS. \\
|
394 |
|
|
extract & 1.2 A & 5 & Extract one element from vector RS, starting at offset RT$\cdot$OS, with size OS into scalar in vector register RD. \\
|
395 |
|
|
broad & 1.2 A & 6 & Broadcast first element of vector RS into all elements of RD with length RT bytes. \\
|
396 |
|
|
compress\_ sparse& 1.2 A & 8 & Compress sparse vector elements indicated by mask bits into contiguous vector. (optional). \\
|
397 |
|
|
expand\_sparse& 1.2 A & 9 & Expand contiguous vector into sparse vector with positions indicated by mask bits. RT = length of output vector. (optional). \\
|
398 |
|
|
|
399 |
|
|
bits2bool & 1.2 A & 12 & The lower n bits of RT are unpacked into a boolean vector RD with length RS, with one bit in each element, where n = RS / OS. \\
|
400 |
|
|
|
401 |
|
|
shift\_expand & 1.2 A & 16 & Shift vector RS up by RT bytes and extend the vector length by RT. The lower RT bytes of RD will be zero. \\
|
402 |
|
|
shift\_reduce & 1.2 A & 17 & Shift vector RS down RT bytes and reduce the length by RT. The lower RT bytes are lost. \\
|
403 |
|
|
shift\_up & 1.2 A & 18 & Shift elements of vector RS up RT elements. The lower RT elements of RD will be zero, the upper RT elements are lost. \\
|
404 |
|
|
shift\_down & 1.2 A & 19 & Shift elements of vector RS down RT elements. The upper RT elements of RD will be zero, the lower RT elements are lost. \\
|
405 |
|
|
%rotate\_up & 1.2 A & 20 & Rotate vector up one element. Optional. \\
|
406 |
|
|
%rotate\_down & 1.2 A & 21 & Rotate vector down one element. Optional. \\
|
407 |
|
|
|
408 |
|
|
div\_ex & 1.2 A & 24 & Divide vector of double-size signed integers RS by signed integers RT. RS has element size 2$\cdot$OS. These are divided by the even numbered
|
409 |
|
|
elements of RT with size OS. The truncated results are stored in
|
410 |
|
|
the even-numbered elements of RD. The remainders are stored in
|
411 |
|
|
the odd-numbered elements of RD. (Optional for vectors). \\
|
412 |
|
|
div\_ex\_u & 1.2 A & 25 & Same, with unsigned integers. (Optional for vectors). \\
|
413 |
|
|
mul\_ex & 1.2 A & 26 & Multiply even-numbered signed integer vector elements to double size result. \\
|
414 |
|
|
mul\_ex\_u & 1.2 A & 27 & Multiply even-numbered unsigned integer vector elements to double size result. \\
|
415 |
|
|
sqrt & 1.2 A & 28 & Square root (floating point, optional). \\
|
416 |
|
|
|
417 |
|
|
add\_ss & 1.2 A & 32 & Add integer vectors, signed with saturation (optional). \\
|
418 |
|
|
add\_us & 1.2 A & 33 & Add integer vectors, unsigned with saturation (optional). \\
|
419 |
|
|
sub\_ss & 1.2 A & 34 & Subtract integer vectors, signed with saturation (optional). \\
|
420 |
|
|
sub\_us & 1.2 A & 35 & Subtract integer vectors, unsigned with saturation (optional). \\
|
421 |
|
|
mul\_ss & 1.2 A & 36 & Multiply integer vectors, signed with saturation (optional). \\
|
422 |
|
|
mul\_us & 1.2 A & 37 & Multiply integer vectors, unsigned with saturation (optional). \\
|
423 |
|
|
add\_oc & 1.2 A & 38 & add with overflow check (optional). \\
|
424 |
|
|
sub\_oc & 1.2 A & 39 & subtract with overflow check (optional). \\
|
425 |
|
|
mul\_oc & 1.2 A & 40 & multiply with overflow check (optional). \\
|
426 |
|
|
div\_oc & 1.2 A & 41 & divide with overflow check (optional). \\
|
427 |
|
|
add\_c & 1.2 A & 42 & Add with carry. Vector has two elements. The upper element is used as carry on input and output (optional). \\
|
428 |
|
|
sub\_b & 1.2 A & 43 & Subtract with borrow. Vector has two elements. The upper element is used as borrow on input and output (optional). \\
|
429 |
|
|
|
430 |
|
|
read\_spev & 1.2 A & 56 & read special vector register. Length RT. \\
|
431 |
|
|
read\_call\_ stack & 1.2 A & 58 & read internal call stack. RD = vector register destination of length RS, RT-RS = internal address (privileged instruction). \\
|
432 |
|
|
write\_call\_ stack & 1.2 A & 59 & write internal call stack. RD = vector register source of length RS, RT-RS = internal address (privileged instruction). \\
|
433 |
|
|
|
434 |
|
|
read\_memory\_ map & 1.2 A & 60 & read memory map. RD = vector register destination of length RS, RT-RS = internal address (privileged instruction). \\
|
435 |
|
|
write\_memory\_ map & 1.2 A & 61 & write memory map. RD = vector register source of length RS, RT-RS = internal address (privileged instruction). \\
|
436 |
|
|
|
437 |
|
|
input & 1.2 A & 62 & read from input port. RD = vector register, RT = port address, RS = vector length (privileged instruction). \\
|
438 |
|
|
output & 1.2 A & 63 & write to output port. RD = vector register source operand, RT = port address, RS = vector length (privileged instruction). \\
|
439 |
|
|
|
440 |
|
|
gp2vec & 1.3 B & 0 & Move value of general purpose register RS to scalar in vector register RD. \\
|
441 |
|
|
|
442 |
|
|
vec2gp & 1.3 B & 1 & Move value of first element of vector register RS to general purpose register RD. \\
|
443 |
|
|
|
444 |
|
|
make\_sequen-ce& 1.3 B & 3 & Make a vector with RS sequential numbers. First value is IM1. \\
|
445 |
|
|
|
446 |
|
|
insert & 1.3 B & 4 & Replace one element in vector RD, starting at offset IM1$\cdot$OS, with first element in RS. \\
|
447 |
|
|
|
448 |
|
|
extract & 1.3 B & 5 & Extract one element from vector RS, starting at offset IM1$\cdot$OS into a scalar in vector register RD. \\
|
449 |
|
|
|
450 |
|
|
compress & 1.3 B & 6 & Compress vector to half the length and half the element size. Double precision $\rightarrow$ single precision, 64-bit
|
451 |
|
|
integer $\rightarrow$ 32-bit integer, etc. \\
|
452 |
|
|
|
453 |
|
|
expand & 1.3 B & 7 & Expand vector to the double length and the double element size. Half precision $\rightarrow$ single precision, 32-bit integer $\rightarrow$ 64-bit integer, etc. \\
|
454 |
|
|
|
455 |
|
|
float2int & 1.3 B & 12 & Conversion of floating point to integer with the same operand size. The rounding mode is specified in IM1. \\
|
456 |
|
|
int2float & 1.3 B & 13 & Conversion of integer to floating point with same operand size. \\
|
457 |
|
|
|
458 |
|
|
round & 1.3 B & 14 & Round floating point to integer in floating point representation. The rounding mode is specified in IM1. \\
|
459 |
|
|
round2n & 1.3 B & 15 & Round to nearest multiple of $2^n$. \newline
|
460 |
|
|
RD = $2^n\cdot$ round($2^{-n}\cdot$ RS). $n$ is a signed integer constant in IM1 (optional). \\
|
461 |
|
|
abs & 1.3 B & 16 & Absolute value of integer. IM1 determines handling of overflow: 0: wrap around, 1: saturate, 2: zero. \\
|
462 |
|
|
|
463 |
|
|
fp\_category & 1.3 B & 17 & Check if floating point numbers belong to the categories indicated by constant. \\
|
464 |
|
|
|
465 |
|
|
broad & 1.3 B & 18 & Broadcast 8-bit constant into all elements of RD with length RS (31 in RS field gives scalar output). \\
|
466 |
|
|
|
467 |
|
|
broadcast\_ max & 1.3 B & 19 & Broadcast 8-bit constant into all elements of RD with maximum vector length. \\
|
468 |
|
|
|
469 |
|
|
byte\_reverse & 1.3 B & 20 & Reverse the order of bytes in each element of vector. \\
|
470 |
|
|
bit\_reverse & 1.3 B & 20 & Reverse the order of bits in each element of vector (optional). \\
|
471 |
|
|
|
472 |
|
|
bitscan & 1.3 B & 21 & Bit scan forward or reverse. Find index to lowest set bit. \\
|
473 |
|
|
|
474 |
|
|
popcount & 1.3 B & 22 & Count the number of bits that are 1 (optional for vectors). \\
|
475 |
|
|
|
476 |
|
|
bool2bits & 1.3 B & 25 & A boolean vector with n elements is packed into the lower n bits of RD, taking bit 0 of each element. The length of RD is at least sufficient to contain n bits. \\
|
477 |
|
|
|
478 |
|
|
bool\_reduce & 1.3 B & 26 & An integer vector is reduced by combining bit 0 of all elements. The output is a scalar integer where bit 0 is the
|
479 |
|
|
AND combination of all the bits, and bit 1 is the OR combination of
|
480 |
|
|
all the bits. The remaining bits are reserved for future use. \\
|
481 |
|
|
|
482 |
|
|
category\_ reduce & 1.3 B & 26 & A floating point vector is reduced to a scalar integer where each bit indicates that the source vector contains at least one element in a certain category, such as NAN, zero, normal positive, etc. \\
|
483 |
|
|
|
484 |
|
|
push & 1.3 B & 56 & Push vector register RS to stack with pointer RD. \\
|
485 |
|
|
pop & 1.3 B & 57 & Pop vector register RS from stack with pointer RD. \\
|
486 |
|
|
clear & 1.3 B & 58 & Clear vector register RS. \\
|
487 |
|
|
|
488 |
|
|
move & 1.4 C & 0 & Move 16 bit integer constant to 16-bit scalar (optional). \\
|
489 |
|
|
add & 1.4 C & 1 & Add broadcasted 16 bit constant to 16-bit vector elements (optional). \\
|
490 |
|
|
and & 1.4 C & 2 & AND broadcasted 16 bit constant with 16-bit vector elements (optional). \\
|
491 |
|
|
or & 1.4 C & 3 & OR broadcasted 16 bit constant with 16-bit vector elements (optional). \\
|
492 |
|
|
xor & 1.4 C & 4 & XOR broadcasted 16 bit constant with 16-bit vector elements (optional). \\
|
493 |
|
|
|
494 |
|
|
move & 1.4 C & 8 & RD = IM2 \textless\textless{} IM1. Sign-extend IM2 to 32 bits and shift left by the unsigned value IM1 to make 32 bit scalar (optional). \\
|
495 |
|
|
move & 1.4 C & 9 & RD = IM2 \textless\textless{} IM1. Sign-extend IM2 to 64 bits and shift left by the unsigned value IM1 to make 64 bit scalar (optional). \\
|
496 |
|
|
add & 1.4 C & 10 & RD += IM2 \textless\textless{} IM1. Add broadcast shifted signed constant to 32-bit vector elements (optional). \\
|
497 |
|
|
add & 1.4 C & 11 & RD += IM2 \textless\textless{} IM1. Add broadcast shifted signed constant to 64-bit vector elements (optional). \\
|
498 |
|
|
and & 1.4 C & 12 & RD \&= IM2 \textless\textless{} IM1. AND broadcast shifted signed constant with 32-bit vector elements (optional). \\
|
499 |
|
|
and & 1.4 C & 13 & RD \&= IM2 \textless\textless{} IM1. AND broadcast shifted signed constant with 64-bit vector elements (optional). \\
|
500 |
|
|
or & 1.4 C & 14 & RD \textbar{}= IM2 \textless\textless{} IM1. OR broadcast shifted signed constant with 32-bit vector elements (optional). \\
|
501 |
|
|
or & 1.4 C & 15 & RD \textbar{}= IM2 \textless\textless{} IM1. OR broadcast shifted signed constant with 64-bit vector elements (optional). \\
|
502 |
|
|
xor & 1.4 C & 16 & RD \^{}= IM2 \textless\textless{} IM1. XOR broadcast shifted signed constant with 32-bit vector elements (optional). \\
|
503 |
|
|
xor & 1.4 C & 17 & RD \^{}= IM2 \textless\textless{} IM1. XOR broadcast shifted signed constant with 64-bit vector elements (optional). \\
|
504 |
|
|
|
505 |
|
|
move & 1.4 C & 32 & Move converted half precision floating point constant to single
|
506 |
|
|
precision scalar (optional). \\
|
507 |
|
|
move & 1.4 C & 33 & Move converted half precision floating point constant to double
|
508 |
|
|
precision scalar (optional). \\
|
509 |
|
|
add & 1.4 C & 34 & Add broadcast half precision floating point constant to single
|
510 |
|
|
precision vector (optional). \\
|
511 |
|
|
add & 1.4 C & 35 & Add broadcast half precision floating point constant to double
|
512 |
|
|
precision vector (optional). \\
|
513 |
|
|
mul & 1.4 C & 36 & Multiply broadcast half precision floating point constant with single precision vector (optional). \\
|
514 |
|
|
mul & 1.4 C & 37 & Multiply broadcast half precision floating point constant with double precision vector (optional). \\
|
515 |
|
|
add\_h & 1.4 C & 40 & add constant to half precision vector (optional). \\
|
516 |
|
|
mul\_h & 1.4 C & 41 & multiply half precision vector with constant (optional). \\
|
517 |
|
|
concatenate & 2.2.6 E & 0.1 & A vector RU of length RT and a vector RS of length RT are concatenated into a vector RD of length 2$\cdot$RT. \\
|
518 |
|
|
permute & 2.2.6 E & 1.1 & The vector elements of RU are permuted within each block of size RT bytes, using indices in RS. Each index is relative to the
|
519 |
|
|
beginning of a block. An index out of range produces zero. The
|
520 |
|
|
maximum block size is implementation dependent. \\
|
521 |
|
|
interleave & 2.2.6 E & 2.1 & Interleave elements of vectors RU and RS of length RT/2 to produce vector RD of length RT. Even-numbered elements of the destination come from RU and odd-numbered elements from RS. (optional). \\
|
522 |
|
|
truth\_tab3 & 2.2.6 E & 8.1 & Boolean function of three inputs, given by a truth table. \\
|
523 |
|
|
|
524 |
|
|
move\_bits & 2.2.7 E & 0.1 & Replace one or more contiguous bits at one position of RS with contiguous bits from another position of RT. Optional \\
|
525 |
|
|
mask\_length & 2.2.7 E & 1.1 & Make mask with true in the first RT bytes. Option bits in IM2. \\
|
526 |
|
|
repeat\_block & 2.2.7 E & 8.1 & Repeat a block of data to make a longer vector. RS is input vector containing data block to repeat. IM2 is length in bytes of the block to repeat (must be a multiple of 4). RT is the length of destination vector RD. (optional). \\
|
527 |
|
|
repeat\_within \_blocks & 2.2.7 E & 9.1 & Broadcast the first element of each block of data in a vector to the entire block. RS is input vector containing data blocks. IM2 is length in bytes of each block (must be a multiple of the operand size). RT is length of destination vector RD. The operand size must be at least 4 bytes. (optional). \\
|
528 |
|
|
|
529 |
|
|
load\_hi & 2.6 A & 0 & Make vector of two elements. dest[0] = 0, dest[1] = IM2. \\
|
530 |
|
|
insert\_hi & 2.6 A & 1 & Make vector of two elements. dest[0] = src1[0], dest[1] = IM2. \\
|
531 |
|
|
make\_mask & 2.6 A & 2 & Make vector where bit 0 of each element comes from bits in IM2, the remaining bits come from RT. \\
|
532 |
|
|
replace & 2.6 A & 3 & Replace elements in RT by constant IM2. \\
|
533 |
|
|
replace\_even & 2.6 A & 4 & Replace even-numbered elements in RT by constant
|
534 |
|
|
IM2. \\
|
535 |
|
|
replace\_odd & 2.6 A & 5 & Replace odd-numbered elements in RT by constant
|
536 |
|
|
IM2. \\
|
537 |
|
|
broad & 2.6 A & 6 & Broadcast 32-bit or float32 constant into all elements of RD with length RT (31 in RT field gives scalar output). \\
|
538 |
|
|
permute & 2.6 A & 8 & The vector elements of RS are permuted within each block of size RT bytes. The 4$\cdot$n bits of IM2 are used as index with 4 bits for
|
539 |
|
|
each element in blocks of size n. The same pattern is used in each
|
540 |
|
|
block. The number of elements in each block, n = RT / OS $\leq$ 8. \\
|
541 |
|
|
replace & 3.1 A & 32 & Replace elements in RT by constant IM2,IM3. \\
|
542 |
|
|
broad & 3.1 A & 33 & Broadcast 64-bit or float64 constant into all elements of RD with length RT (31 in RT field gives scalar output). \\
|
543 |
|
|
\hline
|
544 |
|
|
\end{longtable}
|
545 |
|
|
|
546 |
|
|
\begin{longtable} {|p{25mm}|p{14mm}|p{10mm}|p{95mm}|}
|
547 |
|
|
\caption{List of single-format instructions with memory operands.}
|
548 |
|
|
\label{table:ListOfSingleFormatInstructionsMemory} \\
|
549 |
|
|
\endfirsthead
|
550 |
|
|
\endhead
|
551 |
|
|
\hline
|
552 |
|
|
\bfseries Instruction & \bfseries Format &\bfseries OP1, OP2 & \bfseries Description \\
|
553 |
|
|
\hline
|
554 |
|
|
store & 2.5 B & 8 & Store 32-bit constant IM2 to memory operand [RS+IM1] (optional). \\
|
555 |
|
|
|
556 |
|
|
fence & 2.5 B & 16 & Memory fence at address [RS+IM2]. read, write or full indicated by IM1.\\
|
557 |
|
|
|
558 |
|
|
compare\_swap & 2.5 B & 18 & Atomic compare and exchange with address [RS+IM2].\\
|
559 |
|
|
|
560 |
|
|
read\_insert & 2.5 A & 32 & Replace one element in vector RD, starting at offset
|
561 |
|
|
RT$\cdot$OS, with scalar memory operand [RS+IM2] (optional). \\
|
562 |
|
|
|
563 |
|
|
extract\_store& 2.5 A & 40 & Extract one element from vector RD, starting at offset RT$\cdot$OS, with size OS into memory operand [RS+IM2] (optional). \\
|
564 |
|
|
|
565 |
|
|
\hline
|
566 |
|
|
\end{longtable}
|
567 |
|
|
\vspace{4mm}
|
568 |
|
|
|
569 |
|
|
|
570 |
|
|
|
571 |
|
|
\section{List of control transfer instructions}
|
572 |
|
|
|
573 |
|
|
\begin{longtable}
|
574 |
|
|
{|p{12mm}|p{16mm}|p{60mm}|p{55mm}|}
|
575 |
|
|
%\nopagebreak
|
576 |
|
|
\caption{Condition codes for control transfer instructions with integer operands in general purpose registers }
|
577 |
|
|
\label{table:controlTransferInstructions}
|
578 |
|
|
\endfirsthead
|
579 |
|
|
\endhead
|
580 |
|
|
\hline
|
581 |
|
|
\bfseries OPJ & \bfseries bit 0 \newline of OPJ & \bfseries Instruction & \bfseries Comment \\
|
582 |
|
|
\hline
|
583 |
|
|
0-7 & part of offset & Unconditional jump with 24-bit offset (jump) & Format 1.7 D. Bit 0-2 of OPJ are part of offset \\
|
584 |
|
|
\hline
|
585 |
|
|
8-15 & part of offset & Unconditional call with 24-bit offset (call) & Format 1.7 D. Bit 0-2 of OPJ are part of offset \\
|
586 |
|
|
\hline
|
587 |
|
|
0-1 & invert & sub/jump\_zero, \newline sub/jump\_nzero & Not format 1.7. Not floating point \\
|
588 |
|
|
\hline
|
589 |
|
|
2-3 & invert & sub/jump\_neg, \newline sub/jump\_nneg & Not format 1.7. Not floating point \\
|
590 |
|
|
\hline
|
591 |
|
|
4-5 & invert & sub/jump\_pos, \newline sub/jump\_npos & Not format 1.7. Not floating point \\
|
592 |
|
|
\hline
|
593 |
|
|
6-7 & invert & sub/jump\_overfl, \newline sub/jump\_noverfl & Not format 1.7. Not floating point \\
|
594 |
|
|
\hline
|
595 |
|
|
8-9 & invert & sub/jump\_borrow, \newline sub/jump\_nborrow & Not format 1.7. Not floating point \\
|
596 |
|
|
\hline
|
597 |
|
|
10-11 & invert & and/jump\_zero \newline and/jump\_nzero & Not format 1.7 \\
|
598 |
|
|
\hline
|
599 |
|
|
12-13 & invert & or/jump\_zero \newline or/jump\_nzero & Not format 1.7 \\
|
600 |
|
|
\hline
|
601 |
|
|
14-15 & invert & xor/jump\_zero, \newline xor/jump\_nzero & Not format 1.7 \\
|
602 |
|
|
\hline
|
603 |
|
|
16-17 & invert & add/jump\_zero, \newline add/jump\_nzero & Not floating point \\
|
604 |
|
|
\hline
|
605 |
|
|
18-19 & invert & add/jump\_neg, \newline add/jump\_nneg & Not floating point \\
|
606 |
|
|
\hline
|
607 |
|
|
20-21 & invert & add/jump\_pos, \newline add/jump\_npos & Not floating point \\
|
608 |
|
|
\hline
|
609 |
|
|
22-23 & invert & add/jump\_overfl, \newline add/jump\_noverfl & Not floating point \\
|
610 |
|
|
\hline
|
611 |
|
|
24-25 & invert & add/jump\_carry, \newline add/jump\_ncarry & Not floating point \\
|
612 |
|
|
\hline
|
613 |
|
|
26-27 & invert & test\_bit/jump\_true, \newline test\_bit/jump\_false & \\
|
614 |
|
|
\hline
|
615 |
|
|
28-29 & invert & test\_bits\_and/jump\_true, \newline test\_bits\_and/jump\_false & \\
|
616 |
|
|
\hline
|
617 |
|
|
30-31 & invert & test\_bits\_or/jump\_true, \newline test\_bits\_or/jump\_false & \\
|
618 |
|
|
\hline
|
619 |
|
|
32-33 & invert & compare/jump\_equal, \newline compare/jump\_nequal & \\
|
620 |
|
|
\hline
|
621 |
|
|
34-35 & invert & compare/jump\_sbelow, \newline compare/jump\_saboveeq & \\
|
622 |
|
|
\hline
|
623 |
|
|
36-37 & invert & compare/jump\_sabove, \newline compare/jump\_sbeloweq & \\
|
624 |
|
|
\hline
|
625 |
|
|
38-39 & invert & compare/jump\_ubelow, \newline compare/jump\_uaboveeq & \\
|
626 |
|
|
\hline
|
627 |
|
|
40-41 & invert & compare/jump\_uabove, \newline compare/jump\_ubeloweq & \\
|
628 |
|
|
\hline
|
629 |
|
|
42-47 & invert & Reserved for future use. & \\
|
630 |
|
|
\hline
|
631 |
|
|
|
632 |
|
|
48-49 & invert & increment\_compare/jump\_below, \newline /jump\_aboveeq & \\
|
633 |
|
|
\hline
|
634 |
|
|
50-51 & invert & increment\_compare/jump\_above, \newline /jump\_beloweq & \\
|
635 |
|
|
\hline
|
636 |
|
|
52-53 & invert & sub\_maxlen/jump\_pos, \newline sub\_maxlen/jump\_npos & \\
|
637 |
|
|
\hline
|
638 |
|
|
54-57 & & Reserved for future use. & \\
|
639 |
|
|
\hline
|
640 |
|
|
58-59 & 0 jump \newline 1 call & Indirect jump or call with memory operand. & Format 1.6 B and 2.5.2. \\
|
641 |
|
|
\hline
|
642 |
|
|
58-59 & 0 jump \newline 1 call & Unconditional direct jump or call & 2.5.4, and 3.1.1. \\
|
643 |
|
|
\hline
|
644 |
|
|
60-61 & 0 jump\_ relative \newline 1 call\_ relative & Jump or call with relative address in memory, table index, and arbitrary reference point &
|
645 |
|
|
Format 1.6 A and 2.5.2 \\
|
646 |
|
|
\hline
|
647 |
|
|
60-61 & 0 jump \newline 1 call & Indirect jump or call to value of register & Format 1.7 C \\
|
648 |
|
|
\hline
|
649 |
|
|
62 & 0 & return & Format 1.6 C \\
|
650 |
|
|
\hline
|
651 |
|
|
62 & 0 & sys\_return & Format 1.7 C \\
|
652 |
|
|
\hline
|
653 |
|
|
63 & 1 & sys\_call. ID in register & Format 1.6 A \\
|
654 |
|
|
\hline
|
655 |
|
|
63 & 1 & sys\_call. ID in constants & Format 2.5.7 and 3.1.1. \\
|
656 |
|
|
\hline
|
657 |
|
|
63 & 1 & trap or filler & Format 1.7 C \\
|
658 |
|
|
\hline
|
659 |
|
|
63 & 1 & Conditional traps & Format 2.5.5. \\
|
660 |
|
|
\hline
|
661 |
|
|
\end{longtable}
|
662 |
|
|
|
663 |
|
|
|
664 |
|
|
\begin{longtable}
|
665 |
|
|
{|p{10mm}|p{14mm}|p{65mm}|p{40mm}|}
|
666 |
|
|
%\nopagebreak
|
667 |
|
|
\caption{Condition codes for control transfer instructions with floating point operands in vector registers }
|
668 |
|
|
\label{table:controlTransferInstructionsFloat}
|
669 |
|
|
\endfirsthead
|
670 |
|
|
\endhead
|
671 |
|
|
\hline
|
672 |
|
|
OPJ & bit 0 \newline of OPJ & Instruction & Comment \\
|
673 |
|
|
\hline
|
674 |
|
|
32-33 & invert & compare/jump\_equal, \newline compare/jump\_nequal & false if unordered \\
|
675 |
|
|
\hline
|
676 |
|
|
0-1 & invert & compare/jump\_equal\_uo, \newline compare/jump\_nequal\_uo & true if unordered \\
|
677 |
|
|
\hline
|
678 |
|
|
34-35 & invert & compare/jump\_below, \newline compare/jump\_aboveeq & false if unordered \\
|
679 |
|
|
\hline
|
680 |
|
|
2-3 & invert & compare/jump\_below\_uo, \newline compare/jump\_aboveeq\_uo & true if unordered \\
|
681 |
|
|
\hline
|
682 |
|
|
36-37 & invert & compare/jump\_above, \newline compare/jump\_beloweq & false if unordered \\
|
683 |
|
|
\hline
|
684 |
|
|
4-5 & invert & compare/jump\_above\_uo, \newline compare/jump\_beloweq\_uo & true if unordered \\
|
685 |
|
|
\hline
|
686 |
|
|
38-39 & invert & compare/jump\_abs\_below, \newline compare/jump\_abs\_aboveeq & false if unordered \\
|
687 |
|
|
\hline
|
688 |
|
|
6-7 & invert & compare/jump\_abs\_below\_uo, \newline compare/jump\_abs\_aboveeq\_uo & true if unordered \\
|
689 |
|
|
\hline
|
690 |
|
|
40-41 & invert & compare/jump\_abs\_above, \newline compare/jump\_abs\_beloweq & false if unordered \\
|
691 |
|
|
\hline
|
692 |
|
|
8-9 & invert & compare/jump\_abs\_above\_uo, \newline compare/jump\_abs\_beloweq\_uo & true if unordered \\
|
693 |
|
|
\hline
|
694 |
|
|
24-25 & invert & fp\_category/jump\_true, \newline fp\_category/jump\_false & \\
|
695 |
|
|
\hline
|
696 |
|
|
|
697 |
|
|
|
698 |
|
|
\multicolumn{4}{|c|}{} \\
|
699 |
|
|
\multicolumn{4}{|c|}{ The following instructions treat floating point operands as integers in vector registers: } \\
|
700 |
|
|
\multicolumn{4}{|c|}{} \\
|
701 |
|
|
\hline
|
702 |
|
|
|
703 |
|
|
10-11 & invert & and/jump\_zero \newline and/jump\_nzero & \\
|
704 |
|
|
\hline
|
705 |
|
|
12-13 & invert & or/jump\_zero \newline or/jump\_nzero & \\
|
706 |
|
|
\hline
|
707 |
|
|
14-15 & invert & xor/jump\_zero, \newline xor/jump\_nzero & \\
|
708 |
|
|
\hline
|
709 |
|
|
26-27 & invert & test\_bit/jump\_true, \newline test\_bit/jump\_false & \\
|
710 |
|
|
\hline
|
711 |
|
|
28-29 & invert & test\_bits\_and/jump\_true, \newline test\_bits\_and/jump\_false & \\
|
712 |
|
|
\hline
|
713 |
|
|
30-31 & invert & test\_bits\_or/jump\_true, \newline test\_bits\_or/jump\_false & \\
|
714 |
|
|
\hline
|
715 |
|
|
|
716 |
|
|
\end{longtable}
|
717 |
|
|
|
718 |
|
|
|
719 |
|
|
See page \pageref{descriptionOfControlTransferInstructions} for
|
720 |
|
|
detailed descriptions of control transfer instructions.
|
721 |
|
|
|
722 |
|
|
|
723 |
|
|
\end{document}
|