| 1 |
709 |
jeremybenn |
(* ARM NEON documentation generator.
|
| 2 |
|
|
|
| 3 |
|
|
Copyright (C) 2006, 2007 Free Software Foundation, Inc.
|
| 4 |
|
|
Contributed by CodeSourcery.
|
| 5 |
|
|
|
| 6 |
|
|
This file is part of GCC.
|
| 7 |
|
|
|
| 8 |
|
|
GCC is free software; you can redistribute it and/or modify it under
|
| 9 |
|
|
the terms of the GNU General Public License as published by the Free
|
| 10 |
|
|
Software Foundation; either version 3, or (at your option) any later
|
| 11 |
|
|
version.
|
| 12 |
|
|
|
| 13 |
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
| 14 |
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
| 15 |
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
| 16 |
|
|
for more details.
|
| 17 |
|
|
|
| 18 |
|
|
You should have received a copy of the GNU General Public License
|
| 19 |
|
|
along with GCC; see the file COPYING3. If not see
|
| 20 |
|
|
.
|
| 21 |
|
|
|
| 22 |
|
|
This is an O'Caml program. The O'Caml compiler is available from:
|
| 23 |
|
|
|
| 24 |
|
|
http://caml.inria.fr/
|
| 25 |
|
|
|
| 26 |
|
|
Or from your favourite OS's friendly packaging system. Tested with version
|
| 27 |
|
|
3.09.2, though other versions will probably work too.
|
| 28 |
|
|
|
| 29 |
|
|
Compile with:
|
| 30 |
|
|
ocamlc -c neon.ml
|
| 31 |
|
|
ocamlc -o neon-docgen neon.cmo neon-docgen.ml
|
| 32 |
|
|
|
| 33 |
|
|
Run with:
|
| 34 |
|
|
/path/to/neon-docgen /path/to/gcc/doc/arm-neon-intrinsics.texi
|
| 35 |
|
|
*)
|
| 36 |
|
|
|
| 37 |
|
|
open Neon
|
| 38 |
|
|
|
| 39 |
|
|
(* The combined "ops" and "reinterp" table. *)
|
| 40 |
|
|
let ops_reinterp = reinterp @ ops
|
| 41 |
|
|
|
| 42 |
|
|
(* Helper functions for extracting things from the "ops" table. *)
|
| 43 |
|
|
let single_opcode desired_opcode () =
|
| 44 |
|
|
List.fold_left (fun got_so_far ->
|
| 45 |
|
|
fun row ->
|
| 46 |
|
|
match row with
|
| 47 |
|
|
(opcode, _, _, _, _, _) ->
|
| 48 |
|
|
if opcode = desired_opcode then row :: got_so_far
|
| 49 |
|
|
else got_so_far
|
| 50 |
|
|
) [] ops_reinterp
|
| 51 |
|
|
|
| 52 |
|
|
let multiple_opcodes desired_opcodes () =
|
| 53 |
|
|
List.fold_left (fun got_so_far ->
|
| 54 |
|
|
fun desired_opcode ->
|
| 55 |
|
|
(single_opcode desired_opcode ()) @ got_so_far)
|
| 56 |
|
|
[] desired_opcodes
|
| 57 |
|
|
|
| 58 |
|
|
let ldx_opcode number () =
|
| 59 |
|
|
List.fold_left (fun got_so_far ->
|
| 60 |
|
|
fun row ->
|
| 61 |
|
|
match row with
|
| 62 |
|
|
(opcode, _, _, _, _, _) ->
|
| 63 |
|
|
match opcode with
|
| 64 |
|
|
Vldx n | Vldx_lane n | Vldx_dup n when n = number ->
|
| 65 |
|
|
row :: got_so_far
|
| 66 |
|
|
| _ -> got_so_far
|
| 67 |
|
|
) [] ops_reinterp
|
| 68 |
|
|
|
| 69 |
|
|
let stx_opcode number () =
|
| 70 |
|
|
List.fold_left (fun got_so_far ->
|
| 71 |
|
|
fun row ->
|
| 72 |
|
|
match row with
|
| 73 |
|
|
(opcode, _, _, _, _, _) ->
|
| 74 |
|
|
match opcode with
|
| 75 |
|
|
Vstx n | Vstx_lane n when n = number ->
|
| 76 |
|
|
row :: got_so_far
|
| 77 |
|
|
| _ -> got_so_far
|
| 78 |
|
|
) [] ops_reinterp
|
| 79 |
|
|
|
| 80 |
|
|
let tbl_opcode () =
|
| 81 |
|
|
List.fold_left (fun got_so_far ->
|
| 82 |
|
|
fun row ->
|
| 83 |
|
|
match row with
|
| 84 |
|
|
(opcode, _, _, _, _, _) ->
|
| 85 |
|
|
match opcode with
|
| 86 |
|
|
Vtbl _ -> row :: got_so_far
|
| 87 |
|
|
| _ -> got_so_far
|
| 88 |
|
|
) [] ops_reinterp
|
| 89 |
|
|
|
| 90 |
|
|
let tbx_opcode () =
|
| 91 |
|
|
List.fold_left (fun got_so_far ->
|
| 92 |
|
|
fun row ->
|
| 93 |
|
|
match row with
|
| 94 |
|
|
(opcode, _, _, _, _, _) ->
|
| 95 |
|
|
match opcode with
|
| 96 |
|
|
Vtbx _ -> row :: got_so_far
|
| 97 |
|
|
| _ -> got_so_far
|
| 98 |
|
|
) [] ops_reinterp
|
| 99 |
|
|
|
| 100 |
|
|
(* The groups of intrinsics. *)
|
| 101 |
|
|
let intrinsic_groups =
|
| 102 |
|
|
[ "Addition", single_opcode Vadd;
|
| 103 |
|
|
"Multiplication", single_opcode Vmul;
|
| 104 |
|
|
"Multiply-accumulate", single_opcode Vmla;
|
| 105 |
|
|
"Multiply-subtract", single_opcode Vmls;
|
| 106 |
|
|
"Subtraction", single_opcode Vsub;
|
| 107 |
|
|
"Comparison (equal-to)", single_opcode Vceq;
|
| 108 |
|
|
"Comparison (greater-than-or-equal-to)", single_opcode Vcge;
|
| 109 |
|
|
"Comparison (less-than-or-equal-to)", single_opcode Vcle;
|
| 110 |
|
|
"Comparison (greater-than)", single_opcode Vcgt;
|
| 111 |
|
|
"Comparison (less-than)", single_opcode Vclt;
|
| 112 |
|
|
"Comparison (absolute greater-than-or-equal-to)", single_opcode Vcage;
|
| 113 |
|
|
"Comparison (absolute less-than-or-equal-to)", single_opcode Vcale;
|
| 114 |
|
|
"Comparison (absolute greater-than)", single_opcode Vcagt;
|
| 115 |
|
|
"Comparison (absolute less-than)", single_opcode Vcalt;
|
| 116 |
|
|
"Test bits", single_opcode Vtst;
|
| 117 |
|
|
"Absolute difference", single_opcode Vabd;
|
| 118 |
|
|
"Absolute difference and accumulate", single_opcode Vaba;
|
| 119 |
|
|
"Maximum", single_opcode Vmax;
|
| 120 |
|
|
"Minimum", single_opcode Vmin;
|
| 121 |
|
|
"Pairwise add", single_opcode Vpadd;
|
| 122 |
|
|
"Pairwise add, single_opcode widen and accumulate", single_opcode Vpada;
|
| 123 |
|
|
"Folding maximum", single_opcode Vpmax;
|
| 124 |
|
|
"Folding minimum", single_opcode Vpmin;
|
| 125 |
|
|
"Reciprocal step", multiple_opcodes [Vrecps; Vrsqrts];
|
| 126 |
|
|
"Vector shift left", single_opcode Vshl;
|
| 127 |
|
|
"Vector shift left by constant", single_opcode Vshl_n;
|
| 128 |
|
|
"Vector shift right by constant", single_opcode Vshr_n;
|
| 129 |
|
|
"Vector shift right by constant and accumulate", single_opcode Vsra_n;
|
| 130 |
|
|
"Vector shift right and insert", single_opcode Vsri;
|
| 131 |
|
|
"Vector shift left and insert", single_opcode Vsli;
|
| 132 |
|
|
"Absolute value", single_opcode Vabs;
|
| 133 |
|
|
"Negation", single_opcode Vneg;
|
| 134 |
|
|
"Bitwise not", single_opcode Vmvn;
|
| 135 |
|
|
"Count leading sign bits", single_opcode Vcls;
|
| 136 |
|
|
"Count leading zeros", single_opcode Vclz;
|
| 137 |
|
|
"Count number of set bits", single_opcode Vcnt;
|
| 138 |
|
|
"Reciprocal estimate", single_opcode Vrecpe;
|
| 139 |
|
|
"Reciprocal square-root estimate", single_opcode Vrsqrte;
|
| 140 |
|
|
"Get lanes from a vector", single_opcode Vget_lane;
|
| 141 |
|
|
"Set lanes in a vector", single_opcode Vset_lane;
|
| 142 |
|
|
"Create vector from literal bit pattern", single_opcode Vcreate;
|
| 143 |
|
|
"Set all lanes to the same value",
|
| 144 |
|
|
multiple_opcodes [Vdup_n; Vmov_n; Vdup_lane];
|
| 145 |
|
|
"Combining vectors", single_opcode Vcombine;
|
| 146 |
|
|
"Splitting vectors", multiple_opcodes [Vget_high; Vget_low];
|
| 147 |
|
|
"Conversions", multiple_opcodes [Vcvt; Vcvt_n];
|
| 148 |
|
|
"Move, single_opcode narrowing", single_opcode Vmovn;
|
| 149 |
|
|
"Move, single_opcode long", single_opcode Vmovl;
|
| 150 |
|
|
"Table lookup", tbl_opcode;
|
| 151 |
|
|
"Extended table lookup", tbx_opcode;
|
| 152 |
|
|
"Multiply, lane", single_opcode Vmul_lane;
|
| 153 |
|
|
"Long multiply, lane", single_opcode Vmull_lane;
|
| 154 |
|
|
"Saturating doubling long multiply, lane", single_opcode Vqdmull_lane;
|
| 155 |
|
|
"Saturating doubling multiply high, lane", single_opcode Vqdmulh_lane;
|
| 156 |
|
|
"Multiply-accumulate, lane", single_opcode Vmla_lane;
|
| 157 |
|
|
"Multiply-subtract, lane", single_opcode Vmls_lane;
|
| 158 |
|
|
"Vector multiply by scalar", single_opcode Vmul_n;
|
| 159 |
|
|
"Vector long multiply by scalar", single_opcode Vmull_n;
|
| 160 |
|
|
"Vector saturating doubling long multiply by scalar",
|
| 161 |
|
|
single_opcode Vqdmull_n;
|
| 162 |
|
|
"Vector saturating doubling multiply high by scalar",
|
| 163 |
|
|
single_opcode Vqdmulh_n;
|
| 164 |
|
|
"Vector multiply-accumulate by scalar", single_opcode Vmla_n;
|
| 165 |
|
|
"Vector multiply-subtract by scalar", single_opcode Vmls_n;
|
| 166 |
|
|
"Vector extract", single_opcode Vext;
|
| 167 |
|
|
"Reverse elements", multiple_opcodes [Vrev64; Vrev32; Vrev16];
|
| 168 |
|
|
"Bit selection", single_opcode Vbsl;
|
| 169 |
|
|
"Transpose elements", single_opcode Vtrn;
|
| 170 |
|
|
"Zip elements", single_opcode Vzip;
|
| 171 |
|
|
"Unzip elements", single_opcode Vuzp;
|
| 172 |
|
|
"Element/structure loads, VLD1 variants", ldx_opcode 1;
|
| 173 |
|
|
"Element/structure stores, VST1 variants", stx_opcode 1;
|
| 174 |
|
|
"Element/structure loads, VLD2 variants", ldx_opcode 2;
|
| 175 |
|
|
"Element/structure stores, VST2 variants", stx_opcode 2;
|
| 176 |
|
|
"Element/structure loads, VLD3 variants", ldx_opcode 3;
|
| 177 |
|
|
"Element/structure stores, VST3 variants", stx_opcode 3;
|
| 178 |
|
|
"Element/structure loads, VLD4 variants", ldx_opcode 4;
|
| 179 |
|
|
"Element/structure stores, VST4 variants", stx_opcode 4;
|
| 180 |
|
|
"Logical operations (AND)", single_opcode Vand;
|
| 181 |
|
|
"Logical operations (OR)", single_opcode Vorr;
|
| 182 |
|
|
"Logical operations (exclusive OR)", single_opcode Veor;
|
| 183 |
|
|
"Logical operations (AND-NOT)", single_opcode Vbic;
|
| 184 |
|
|
"Logical operations (OR-NOT)", single_opcode Vorn;
|
| 185 |
|
|
"Reinterpret casts", single_opcode Vreinterp ]
|
| 186 |
|
|
|
| 187 |
|
|
(* Given an intrinsic shape, produce a string to document the corresponding
|
| 188 |
|
|
operand shapes. *)
|
| 189 |
|
|
let rec analyze_shape shape =
|
| 190 |
|
|
let rec n_things n thing =
|
| 191 |
|
|
match n with
|
| 192 |
|
|
|
| 193 |
|
|
| n -> thing :: (n_things (n - 1) thing)
|
| 194 |
|
|
in
|
| 195 |
|
|
let rec analyze_shape_elt reg_no elt =
|
| 196 |
|
|
match elt with
|
| 197 |
|
|
Dreg -> "@var{d" ^ (string_of_int reg_no) ^ "}"
|
| 198 |
|
|
| Qreg -> "@var{q" ^ (string_of_int reg_no) ^ "}"
|
| 199 |
|
|
| Corereg -> "@var{r" ^ (string_of_int reg_no) ^ "}"
|
| 200 |
|
|
| Immed -> "#@var{0}"
|
| 201 |
|
|
| VecArray (1, elt) ->
|
| 202 |
|
|
let elt_regexp = analyze_shape_elt 0 elt in
|
| 203 |
|
|
"@{" ^ elt_regexp ^ "@}"
|
| 204 |
|
|
| VecArray (n, elt) ->
|
| 205 |
|
|
let rec f m =
|
| 206 |
|
|
match m with
|
| 207 |
|
|
|
| 208 |
|
|
| m -> (analyze_shape_elt (m - 1) elt) :: (f (m - 1))
|
| 209 |
|
|
in
|
| 210 |
|
|
let ops = List.rev (f n) in
|
| 211 |
|
|
"@{" ^ (commas (fun x -> x) ops "") ^ "@}"
|
| 212 |
|
|
| (PtrTo elt | CstPtrTo elt) ->
|
| 213 |
|
|
"[" ^ (analyze_shape_elt reg_no elt) ^ "]"
|
| 214 |
|
|
| Element_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[@var{0}]"
|
| 215 |
|
|
| Element_of_qreg -> (analyze_shape_elt reg_no Qreg) ^ "[@var{0}]"
|
| 216 |
|
|
| All_elements_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[]"
|
| 217 |
|
|
| Alternatives alts -> (analyze_shape_elt reg_no (List.hd alts))
|
| 218 |
|
|
in
|
| 219 |
|
|
match shape with
|
| 220 |
|
|
All (n, elt) -> commas (analyze_shape_elt 0) (n_things n elt) ""
|
| 221 |
|
|
| Long -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Dreg) ^
|
| 222 |
|
|
", " ^ (analyze_shape_elt 0 Dreg)
|
| 223 |
|
|
| Long_noreg elt -> (analyze_shape_elt 0 elt) ^ ", " ^
|
| 224 |
|
|
(analyze_shape_elt 0 elt)
|
| 225 |
|
|
| Wide -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
|
| 226 |
|
|
", " ^ (analyze_shape_elt 0 Dreg)
|
| 227 |
|
|
| Wide_noreg elt -> analyze_shape (Long_noreg elt)
|
| 228 |
|
|
| Narrow -> (analyze_shape_elt 0 Dreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
|
| 229 |
|
|
", " ^ (analyze_shape_elt 0 Qreg)
|
| 230 |
|
|
| Use_operands elts -> commas (analyze_shape_elt 0) (Array.to_list elts) ""
|
| 231 |
|
|
| By_scalar Dreg ->
|
| 232 |
|
|
analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |])
|
| 233 |
|
|
| By_scalar Qreg ->
|
| 234 |
|
|
analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |])
|
| 235 |
|
|
| By_scalar _ -> assert false
|
| 236 |
|
|
| Wide_lane ->
|
| 237 |
|
|
analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
|
| 238 |
|
|
| Wide_scalar ->
|
| 239 |
|
|
analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
|
| 240 |
|
|
| Pair_result elt ->
|
| 241 |
|
|
let elt_regexp = analyze_shape_elt 0 elt in
|
| 242 |
|
|
let elt_regexp' = analyze_shape_elt 1 elt in
|
| 243 |
|
|
elt_regexp ^ ", " ^ elt_regexp'
|
| 244 |
|
|
| Unary_scalar _ -> "FIXME Unary_scalar"
|
| 245 |
|
|
| Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |])
|
| 246 |
|
|
| Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |])
|
| 247 |
|
|
| Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |])
|
| 248 |
|
|
|
| 249 |
|
|
(* Document a single intrinsic. *)
|
| 250 |
|
|
let describe_intrinsic first chan
|
| 251 |
|
|
(elt_ty, (_, features, shape, name, munge, _)) =
|
| 252 |
|
|
let c_arity, new_elt_ty = munge shape elt_ty in
|
| 253 |
|
|
let c_types = strings_of_arity c_arity in
|
| 254 |
|
|
Printf.fprintf chan "@itemize @bullet\n";
|
| 255 |
|
|
let item_code = if first then "@item" else "@itemx" in
|
| 256 |
|
|
Printf.fprintf chan "%s %s %s_%s (" item_code (List.hd c_types)
|
| 257 |
|
|
(intrinsic_name name) (string_of_elt elt_ty);
|
| 258 |
|
|
Printf.fprintf chan "%s)\n" (commas (fun ty -> ty) (List.tl c_types) "");
|
| 259 |
|
|
if not (List.exists (fun feature -> feature = No_op) features) then
|
| 260 |
|
|
begin
|
| 261 |
|
|
let print_one_insn name =
|
| 262 |
|
|
Printf.fprintf chan "@code{";
|
| 263 |
|
|
let no_suffix = (new_elt_ty = NoElts) in
|
| 264 |
|
|
let name_with_suffix =
|
| 265 |
|
|
if no_suffix then name
|
| 266 |
|
|
else name ^ "." ^ (string_of_elt_dots new_elt_ty)
|
| 267 |
|
|
in
|
| 268 |
|
|
let possible_operands = analyze_all_shapes features shape
|
| 269 |
|
|
analyze_shape
|
| 270 |
|
|
in
|
| 271 |
|
|
let rec print_one_possible_operand op =
|
| 272 |
|
|
Printf.fprintf chan "%s %s}" name_with_suffix op
|
| 273 |
|
|
in
|
| 274 |
|
|
(* If the intrinsic expands to multiple instructions, we assume
|
| 275 |
|
|
they are all of the same form. *)
|
| 276 |
|
|
print_one_possible_operand (List.hd possible_operands)
|
| 277 |
|
|
in
|
| 278 |
|
|
let rec print_insns names =
|
| 279 |
|
|
match names with
|
| 280 |
|
|
[] -> ()
|
| 281 |
|
|
| [name] -> print_one_insn name
|
| 282 |
|
|
| name::names -> (print_one_insn name;
|
| 283 |
|
|
Printf.fprintf chan " @emph{or} ";
|
| 284 |
|
|
print_insns names)
|
| 285 |
|
|
in
|
| 286 |
|
|
let insn_names = get_insn_names features name in
|
| 287 |
|
|
Printf.fprintf chan "@*@emph{Form of expected instruction(s):} ";
|
| 288 |
|
|
print_insns insn_names;
|
| 289 |
|
|
Printf.fprintf chan "\n"
|
| 290 |
|
|
end;
|
| 291 |
|
|
Printf.fprintf chan "@end itemize\n";
|
| 292 |
|
|
Printf.fprintf chan "\n\n"
|
| 293 |
|
|
|
| 294 |
|
|
(* Document a group of intrinsics. *)
|
| 295 |
|
|
let document_group chan (group_title, group_extractor) =
|
| 296 |
|
|
(* Extract the rows in question from the ops table and then turn them
|
| 297 |
|
|
into a list of intrinsics. *)
|
| 298 |
|
|
let intrinsics =
|
| 299 |
|
|
List.fold_left (fun got_so_far ->
|
| 300 |
|
|
fun row ->
|
| 301 |
|
|
match row with
|
| 302 |
|
|
(_, _, _, _, _, elt_tys) ->
|
| 303 |
|
|
List.fold_left (fun got_so_far' ->
|
| 304 |
|
|
fun elt_ty ->
|
| 305 |
|
|
(elt_ty, row) :: got_so_far')
|
| 306 |
|
|
got_so_far elt_tys
|
| 307 |
|
|
) [] (group_extractor ())
|
| 308 |
|
|
in
|
| 309 |
|
|
(* Emit the title for this group. *)
|
| 310 |
|
|
Printf.fprintf chan "@subsubsection %s\n\n" group_title;
|
| 311 |
|
|
(* Emit a description of each intrinsic. *)
|
| 312 |
|
|
List.iter (describe_intrinsic true chan) intrinsics;
|
| 313 |
|
|
(* Close this group. *)
|
| 314 |
|
|
Printf.fprintf chan "\n\n"
|
| 315 |
|
|
|
| 316 |
|
|
let gnu_header chan =
|
| 317 |
|
|
List.iter (fun s -> Printf.fprintf chan "%s\n" s) [
|
| 318 |
|
|
"@c Copyright (C) 2006 Free Software Foundation, Inc.";
|
| 319 |
|
|
"@c This is part of the GCC manual.";
|
| 320 |
|
|
"@c For copying conditions, see the file gcc.texi.";
|
| 321 |
|
|
"";
|
| 322 |
|
|
"@c This file is generated automatically using gcc/config/arm/neon-docgen.ml";
|
| 323 |
|
|
"@c Please do not edit manually."]
|
| 324 |
|
|
|
| 325 |
|
|
(* Program entry point. *)
|
| 326 |
|
|
let _ =
|
| 327 |
|
|
if Array.length Sys.argv <> 2 then
|
| 328 |
|
|
failwith "Usage: neon-docgen
|
| 329 |
|
|
else
|
| 330 |
|
|
let file = Sys.argv.(1) in
|
| 331 |
|
|
try
|
| 332 |
|
|
let chan = open_out file in
|
| 333 |
|
|
gnu_header chan;
|
| 334 |
|
|
List.iter (document_group chan) intrinsic_groups;
|
| 335 |
|
|
close_out chan
|
| 336 |
|
|
with Sys_error sys ->
|
| 337 |
|
|
failwith ("Could not create output file " ^ file ^ ": " ^ sys)
|