OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [config/] [arm/] [neon.ml] - Blame information for rev 709

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 709 jeremybenn
(* Common code for ARM NEON header file, documentation and test case
2
   generators.
3
 
4
   Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
5
   Contributed by CodeSourcery.
6
 
7
   This file is part of GCC.
8
 
9
   GCC is free software; you can redistribute it and/or modify it under
10
   the terms of the GNU General Public License as published by the Free
11
   Software Foundation; either version 3, or (at your option) any later
12
   version.
13
 
14
   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
   WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
   for more details.
18
 
19
   You should have received a copy of the GNU General Public License
20
   along with GCC; see the file COPYING3.  If not see
21
   .  *)
22
 
23
(* Shorthand types for vector elements.  *)
24
type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
25
          | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
26
          | Cast of elts * elts | NoElts
27
 
28
type eltclass = Signed | Unsigned | Float | Poly | Int | Bits
29
              | ConvClass of eltclass * eltclass | NoType
30
 
31
(* These vector types correspond directly to C types.  *)
32
type vectype = T_int8x8    | T_int8x16
33
             | T_int16x4   | T_int16x8
34
             | T_int32x2   | T_int32x4
35
             | T_int64x1   | T_int64x2
36
             | T_uint8x8   | T_uint8x16
37
             | T_uint16x4  | T_uint16x8
38
             | T_uint32x2  | T_uint32x4
39
             | T_uint64x1  | T_uint64x2
40
             | T_float32x2 | T_float32x4
41
             | T_poly8x8   | T_poly8x16
42
             | T_poly16x4  | T_poly16x8
43
             | T_immediate of int * int
44
             | T_int8      | T_int16
45
             | T_int32     | T_int64
46
             | T_uint8     | T_uint16
47
             | T_uint32    | T_uint64
48
             | T_poly8     | T_poly16
49
             | T_float32   | T_arrayof of int * vectype
50
             | T_ptrto of vectype | T_const of vectype
51
             | T_void      | T_intQI
52
             | T_intHI     | T_intSI
53
             | T_intDI     | T_floatSF
54
 
55
(* The meanings of the following are:
56
     TImode : "Tetra", two registers (four words).
57
     EImode : "hExa", three registers (six words).
58
     OImode : "Octa", four registers (eight words).
59
     CImode : "dodeCa", six registers (twelve words).
60
     XImode : "heXadeca", eight registers (sixteen words).
61
*)
62
 
63
type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode
64
 
65
type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
66
               | PtrTo of shape_elt | CstPtrTo of shape_elt
67
               (* These next ones are used only in the test generator.  *)
68
               | Element_of_dreg        (* Used for "lane" variants.  *)
69
               | Element_of_qreg        (* Likewise.  *)
70
               | All_elements_of_dreg   (* Used for "dup" variants.  *)
71
               | Alternatives of shape_elt list (* Used for multiple valid operands *)
72
 
73
type shape_form = All of int * shape_elt
74
                | Long
75
                | Long_noreg of shape_elt
76
                | Wide
77
                | Wide_noreg of shape_elt
78
                | Narrow
79
                | Long_imm
80
                | Narrow_imm
81
                | Binary_imm of shape_elt
82
                | Use_operands of shape_elt array
83
                | By_scalar of shape_elt
84
                | Unary_scalar of shape_elt
85
                | Wide_lane
86
                | Wide_scalar
87
                | Pair_result of shape_elt
88
 
89
type arity = Arity0 of vectype
90
           | Arity1 of vectype * vectype
91
           | Arity2 of vectype * vectype * vectype
92
           | Arity3 of vectype * vectype * vectype * vectype
93
           | Arity4 of vectype * vectype * vectype * vectype * vectype
94
 
95
type vecmode = V8QI | V4HI | V2SI | V2SF | DI
96
             | V16QI | V8HI | V4SI | V4SF | V2DI
97
             | QI | HI | SI | SF
98
 
99
type opcode =
100
  (* Binary ops.  *)
101
    Vadd
102
  | Vmul
103
  | Vmla
104
  | Vmls
105
  | Vsub
106
  | Vceq
107
  | Vcge
108
  | Vcgt
109
  | Vcle
110
  | Vclt
111
  | Vcage
112
  | Vcagt
113
  | Vcale
114
  | Vcalt
115
  | Vtst
116
  | Vabd
117
  | Vaba
118
  | Vmax
119
  | Vmin
120
  | Vpadd
121
  | Vpada
122
  | Vpmax
123
  | Vpmin
124
  | Vrecps
125
  | Vrsqrts
126
  | Vshl
127
  | Vshr_n
128
  | Vshl_n
129
  | Vsra_n
130
  | Vsri
131
  | Vsli
132
  (* Logic binops.  *)
133
  | Vand
134
  | Vorr
135
  | Veor
136
  | Vbic
137
  | Vorn
138
  | Vbsl
139
  (* Ops with scalar.  *)
140
  | Vmul_lane
141
  | Vmla_lane
142
  | Vmls_lane
143
  | Vmul_n
144
  | Vmla_n
145
  | Vmls_n
146
  | Vmull_n
147
  | Vmull_lane
148
  | Vqdmull_n
149
  | Vqdmull_lane
150
  | Vqdmulh_n
151
  | Vqdmulh_lane
152
  (* Unary ops.  *)
153
  | Vabs
154
  | Vneg
155
  | Vcls
156
  | Vclz
157
  | Vcnt
158
  | Vrecpe
159
  | Vrsqrte
160
  | Vmvn
161
  (* Vector extract.  *)
162
  | Vext
163
  (* Reverse elements.  *)
164
  | Vrev64
165
  | Vrev32
166
  | Vrev16
167
  (* Transposition ops.  *)
168
  | Vtrn
169
  | Vzip
170
  | Vuzp
171
  (* Loads and stores (VLD1/VST1/VLD2...), elements and structures.  *)
172
  | Vldx of int
173
  | Vstx of int
174
  | Vldx_lane of int
175
  | Vldx_dup of int
176
  | Vstx_lane of int
177
  (* Set/extract lanes from a vector.  *)
178
  | Vget_lane
179
  | Vset_lane
180
  (* Initialize vector from bit pattern.  *)
181
  | Vcreate
182
  (* Set all lanes to same value.  *)
183
  | Vdup_n
184
  | Vmov_n  (* Is this the same?  *)
185
  (* Duplicate scalar to all lanes of vector.  *)
186
  | Vdup_lane
187
  (* Combine vectors.  *)
188
  | Vcombine
189
  (* Get quadword high/low parts.  *)
190
  | Vget_high
191
  | Vget_low
192
  (* Convert vectors.  *)
193
  | Vcvt
194
  | Vcvt_n
195
  (* Narrow/lengthen vectors.  *)
196
  | Vmovn
197
  | Vmovl
198
  (* Table lookup.  *)
199
  | Vtbl of int
200
  | Vtbx of int
201
  (* Reinterpret casts.  *)
202
  | Vreinterp
203
 
204
(* Features used for documentation, to distinguish between some instruction
205
   variants, and to signal special requirements (e.g. swapping arguments).  *)
206
 
207
type features =
208
    Halving
209
  | Rounding
210
  | Saturating
211
  | Dst_unsign
212
  | High_half
213
  | Doubling
214
  | Flipped of string  (* Builtin name to use with flipped arguments.  *)
215
  | InfoWord  (* Pass an extra word for signage/rounding etc. (always passed
216
                 for All _, Long, Wide, Narrow shape_forms.  *)
217
  | ReturnPtr  (* Pass explicit pointer to return value as first argument.  *)
218
    (* A specification as to the shape of instruction expected upon
219
       disassembly, used if it differs from the shape used to build the
220
       intrinsic prototype.  Multiple entries in the constructor's argument
221
       indicate that the intrinsic expands to more than one assembly
222
       instruction, each with a corresponding shape specified here.  *)
223
  | Disassembles_as of shape_form list
224
  | Builtin_name of string  (* Override the name of the builtin.  *)
225
    (* Override the name of the instruction.  If more than one name
226
       is specified, it means that the instruction can have any of those
227
       names.  *)
228
  | Instruction_name of string list
229
    (* Mark that the intrinsic yields no instructions, or expands to yield
230
       behavior that the test generator cannot test.  *)
231
  | No_op
232
    (* Mark that the intrinsic has constant arguments that cannot be set
233
       to the defaults (zero for pointers and one otherwise) in the test
234
       cases.  The function supplied must return the integer to be written
235
       into the testcase for the argument number (0-based) supplied to it.  *)
236
  | Const_valuator of (int -> int)
237
  | Fixed_return_reg
238
 
239
exception MixedMode of elts * elts
240
 
241
let rec elt_width = function
242
    S8 | U8 | P8 | I8 | B8 -> 8
243
  | S16 | U16 | P16 | I16 | B16 -> 16
244
  | S32 | F32 | U32 | I32 | B32 -> 32
245
  | S64 | U64 | I64 | B64 -> 64
246
  | Conv (a, b) ->
247
      let wa = elt_width a and wb = elt_width b in
248
      if wa = wb then wa else failwith "element width?"
249
  | Cast (a, b) -> raise (MixedMode (a, b))
250
  | NoElts -> failwith "No elts"
251
 
252
let rec elt_class = function
253
    S8 | S16 | S32 | S64 -> Signed
254
  | U8 | U16 | U32 | U64 -> Unsigned
255
  | P8 | P16 -> Poly
256
  | F32 -> Float
257
  | I8 | I16 | I32 | I64 -> Int
258
  | B8 | B16 | B32 | B64 -> Bits
259
  | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
260
  | NoElts -> NoType
261
 
262
let elt_of_class_width c w =
263
  match c, w with
264
    Signed, 8 -> S8
265
  | Signed, 16 -> S16
266
  | Signed, 32 -> S32
267
  | Signed, 64 -> S64
268
  | Float, 32 -> F32
269
  | Unsigned, 8 -> U8
270
  | Unsigned, 16 -> U16
271
  | Unsigned, 32 -> U32
272
  | Unsigned, 64 -> U64
273
  | Poly, 8 -> P8
274
  | Poly, 16 -> P16
275
  | Int, 8 -> I8
276
  | Int, 16 -> I16
277
  | Int, 32 -> I32
278
  | Int, 64 -> I64
279
  | Bits, 8 -> B8
280
  | Bits, 16 -> B16
281
  | Bits, 32 -> B32
282
  | Bits, 64 -> B64
283
  | _ -> failwith "Bad element type"
284
 
285
(* Return unsigned integer element the same width as argument.  *)
286
let unsigned_of_elt elt =
287
  elt_of_class_width Unsigned (elt_width elt)
288
 
289
let signed_of_elt elt =
290
  elt_of_class_width Signed (elt_width elt)
291
 
292
(* Return untyped bits element the same width as argument.  *)
293
let bits_of_elt elt =
294
  elt_of_class_width Bits (elt_width elt)
295
 
296
let non_signed_variant = function
297
    S8 -> I8
298
  | S16 -> I16
299
  | S32 -> I32
300
  | S64 -> I64
301
  | U8 -> I8
302
  | U16 -> I16
303
  | U32 -> I32
304
  | U64 -> I64
305
  | x -> x
306
 
307
let poly_unsigned_variant v =
308
  let elclass = match elt_class v with
309
    Poly -> Unsigned
310
  | x -> x in
311
  elt_of_class_width elclass (elt_width v)
312
 
313
let widen_elt elt =
314
  let w = elt_width elt
315
  and c = elt_class elt in
316
  elt_of_class_width c (w * 2)
317
 
318
let narrow_elt elt =
319
  let w = elt_width elt
320
  and c = elt_class elt in
321
  elt_of_class_width c (w / 2)
322
 
323
(* If we're trying to find a mode from a "Use_operands" instruction, use the
324
   last vector operand as the dominant mode used to invoke the correct builtin.
325
   We must stick to this rule in neon.md.  *)
326
let find_key_operand operands =
327
  let rec scan opno =
328
    match operands.(opno) with
329
      Qreg -> Qreg
330
    | Dreg -> Dreg
331
    | VecArray (_, Qreg) -> Qreg
332
    | VecArray (_, Dreg) -> Dreg
333
    | _ -> scan (opno-1)
334
  in
335
    scan ((Array.length operands) - 1)
336
 
337
let rec mode_of_elt elt shape =
338
  let flt = match elt_class elt with
339
    Float | ConvClass(_, Float) -> true | _ -> false in
340
  let idx =
341
    match elt_width elt with
342
      8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
343
    | _ -> failwith "Bad element width"
344
  in match shape with
345
    All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
346
  | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
347
      [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
348
  | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
349
  | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
350
      [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
351
  | All (_, (Corereg | PtrTo _ | CstPtrTo _)) ->
352
      [| QI; HI; if flt then SF else SI; DI |].(idx)
353
  | Long | Wide | Wide_lane | Wide_scalar
354
  | Long_imm ->
355
      [| V8QI; V4HI; V2SI; DI |].(idx)
356
  | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
357
  | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
358
  | _ -> failwith "invalid shape"
359
 
360
(* Modify an element type dependent on the shape of the instruction and the
361
   operand number.  *)
362
 
363
let shapemap shape no =
364
  let ident = fun x -> x in
365
  match shape with
366
    All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _
367
  | Binary_imm _ -> ident
368
  | Long | Long_noreg _ | Wide_scalar | Long_imm ->
369
      [| widen_elt; ident; ident |].(no)
370
  | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no)
371
  | Wide_lane -> [| widen_elt; ident; ident; ident |].(no)
372
  | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no)
373
 
374
(* Register type (D/Q) of an operand, based on shape and operand number.  *)
375
 
376
let regmap shape no =
377
  match shape with
378
    All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg
379
  | Long -> [| Qreg; Dreg; Dreg |].(no)
380
  | Wide -> [| Qreg; Qreg; Dreg |].(no)
381
  | Narrow -> [| Dreg; Qreg; Qreg |].(no)
382
  | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no)
383
  | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no)
384
  | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no)
385
  | Unary_scalar reg -> [| reg; Dreg; Immed |].(no)
386
  | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no)
387
  | Binary_imm reg -> [| reg; reg; Immed |].(no)
388
  | Long_imm -> [| Qreg; Dreg; Immed |].(no)
389
  | Narrow_imm -> [| Dreg; Qreg; Immed |].(no)
390
  | Use_operands these -> these.(no)
391
 
392
let type_for_elt shape elt no =
393
  let elt = (shapemap shape no) elt in
394
  let reg = regmap shape no in
395
  let rec type_for_reg_elt reg elt =
396
    match reg with
397
      Dreg ->
398
        begin match elt with
399
          S8 -> T_int8x8
400
        | S16 -> T_int16x4
401
        | S32 -> T_int32x2
402
        | S64 -> T_int64x1
403
        | U8 -> T_uint8x8
404
        | U16 -> T_uint16x4
405
        | U32 -> T_uint32x2
406
        | U64 -> T_uint64x1
407
        | F32 -> T_float32x2
408
        | P8 -> T_poly8x8
409
        | P16 -> T_poly16x4
410
        | _ -> failwith "Bad elt type"
411
        end
412
    | Qreg ->
413
        begin match elt with
414
          S8 -> T_int8x16
415
        | S16 -> T_int16x8
416
        | S32 -> T_int32x4
417
        | S64 -> T_int64x2
418
        | U8 -> T_uint8x16
419
        | U16 -> T_uint16x8
420
        | U32 -> T_uint32x4
421
        | U64 -> T_uint64x2
422
        | F32 -> T_float32x4
423
        | P8 -> T_poly8x16
424
        | P16 -> T_poly16x8
425
        | _ -> failwith "Bad elt type"
426
        end
427
    | Corereg ->
428
        begin match elt with
429
          S8 -> T_int8
430
        | S16 -> T_int16
431
        | S32 -> T_int32
432
        | S64 -> T_int64
433
        | U8 -> T_uint8
434
        | U16 -> T_uint16
435
        | U32 -> T_uint32
436
        | U64 -> T_uint64
437
        | P8 -> T_poly8
438
        | P16 -> T_poly16
439
        | F32 -> T_float32
440
        | _ -> failwith "Bad elt type"
441
        end
442
    | Immed ->
443
        T_immediate (0, 0)
444
    | VecArray (num, sub) ->
445
        T_arrayof (num, type_for_reg_elt sub elt)
446
    | PtrTo x ->
447
        T_ptrto (type_for_reg_elt x elt)
448
    | CstPtrTo x ->
449
        T_ptrto (T_const (type_for_reg_elt x elt))
450
    (* Anything else is solely for the use of the test generator.  *)
451
    | _ -> assert false
452
  in
453
    type_for_reg_elt reg elt
454
 
455
(* Return size of a vector type, in bits.  *)
456
let vectype_size = function
457
    T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
458
  | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
459
  | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
460
  | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
461
  | T_uint8x16 | T_uint16x8  | T_uint32x4  | T_uint64x2
462
  | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
463
  | _ -> raise Not_found
464
 
465
let inttype_for_array num elttype =
466
  let eltsize = vectype_size elttype in
467
  let numwords = (num * eltsize) / 32 in
468
  match numwords with
469
    4 -> B_TImode
470
  | 6 -> B_EImode
471
  | 8 -> B_OImode
472
  | 12 -> B_CImode
473
  | 16 -> B_XImode
474
  | _ -> failwith ("no int type for size " ^ string_of_int numwords)
475
 
476
(* These functions return pairs of (internal, external) types, where "internal"
477
   types are those seen by GCC, and "external" are those seen by the assembler.
478
   These types aren't necessarily the same, since the intrinsics can munge more
479
   than one C type into each assembler opcode.  *)
480
 
481
let make_sign_invariant func shape elt =
482
  let arity, elt' = func shape elt in
483
  arity, non_signed_variant elt'
484
 
485
(* Don't restrict any types.  *)
486
 
487
let elts_same make_arity shape elt =
488
  let vtype = type_for_elt shape elt in
489
  make_arity vtype, elt
490
 
491
(* As sign_invar_*, but when sign matters.  *)
492
let elts_same_io_lane =
493
  elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
494
 
495
let elts_same_io =
496
  elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2))
497
 
498
let elts_same_2_lane =
499
  elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3))
500
 
501
let elts_same_3 = elts_same_2_lane
502
 
503
let elts_same_2 =
504
  elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2))
505
 
506
let elts_same_1 =
507
  elts_same (fun vtype -> Arity1 (vtype 0, vtype 1))
508
 
509
(* Use for signed/unsigned invariant operations (i.e. where the operation
510
   doesn't depend on the sign of the data.  *)
511
 
512
let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
513
let sign_invar_io = make_sign_invariant elts_same_io
514
let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
515
let sign_invar_2 = make_sign_invariant elts_same_2
516
let sign_invar_1 = make_sign_invariant elts_same_1
517
 
518
(* Sign-sensitive comparison.  *)
519
 
520
let cmp_sign_matters shape elt =
521
  let vtype = type_for_elt shape elt
522
  and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
523
  Arity2 (rtype, vtype 1, vtype 2), elt
524
 
525
(* Signed/unsigned invariant comparison.  *)
526
 
527
let cmp_sign_invar shape elt =
528
  let shape', elt' = cmp_sign_matters shape elt in
529
  let elt'' =
530
    match non_signed_variant elt' with
531
      P8 -> I8
532
    | x -> x
533
  in
534
    shape', elt''
535
 
536
(* Comparison (VTST) where only the element width matters.  *)
537
 
538
let cmp_bits shape elt =
539
  let vtype = type_for_elt shape elt
540
  and rtype = type_for_elt shape (unsigned_of_elt elt) 0
541
  and bits_only = bits_of_elt elt in
542
  Arity2 (rtype, vtype 1, vtype 2), bits_only
543
 
544
let reg_shift shape elt =
545
  let vtype = type_for_elt shape elt
546
  and op2type = type_for_elt shape (signed_of_elt elt) 2 in
547
  Arity2 (vtype 0, vtype 1, op2type), elt
548
 
549
(* Genericised constant-shift type-generating function.  *)
550
 
551
let const_shift mkimm ?arity ?result shape elt =
552
  let op2type = (shapemap shape 2) elt in
553
  let op2width = elt_width op2type in
554
  let op2 = mkimm op2width
555
  and op1 = type_for_elt shape elt 1
556
  and r_elt =
557
    match result with
558
      None -> elt
559
    | Some restriction -> restriction elt in
560
  let rtype = type_for_elt shape r_elt 0 in
561
  match arity with
562
    None -> Arity2 (rtype, op1, op2), elt
563
  | Some mkarity -> mkarity rtype op1 op2, elt
564
 
565
(* Use for immediate right-shifts.  *)
566
 
567
let shift_right shape elt =
568
  const_shift (fun imm -> T_immediate (1, imm)) shape elt
569
 
570
let shift_right_acc shape elt =
571
  const_shift (fun imm -> T_immediate (1, imm))
572
    ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt
573
 
574
(* Use for immediate right-shifts when the operation doesn't care about
575
   signedness.  *)
576
 
577
let shift_right_sign_invar =
578
  make_sign_invariant shift_right
579
 
580
(* Immediate right-shift; result is unsigned even when operand is signed.  *)
581
 
582
let shift_right_to_uns shape elt =
583
  const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt
584
    shape elt
585
 
586
(* Immediate left-shift.  *)
587
 
588
let shift_left shape elt =
589
  const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt
590
 
591
(* Immediate left-shift, unsigned result.  *)
592
 
593
let shift_left_to_uns shape elt =
594
  const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt
595
    shape elt
596
 
597
(* Immediate left-shift, don't care about signs.  *)
598
 
599
let shift_left_sign_invar =
600
  make_sign_invariant shift_left
601
 
602
(* Shift left/right and insert: only element size matters.  *)
603
 
604
let shift_insert shape elt =
605
  let arity, elt =
606
    const_shift (fun imm -> T_immediate (1, imm))
607
    ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in
608
  arity, bits_of_elt elt
609
 
610
(* Get/set lane.  *)
611
 
612
let get_lane shape elt =
613
  let vtype = type_for_elt shape elt in
614
  Arity2 (vtype 0, vtype 1, vtype 2),
615
    (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x)
616
 
617
let set_lane shape elt =
618
  let vtype = type_for_elt shape elt in
619
  Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
620
 
621
let set_lane_notype shape elt =
622
  let vtype = type_for_elt shape elt in
623
  Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts
624
 
625
let create_vector shape elt =
626
  let vtype = type_for_elt shape U64 1
627
  and rtype = type_for_elt shape elt 0 in
628
  Arity1 (rtype, vtype), elt
629
 
630
let conv make_arity shape elt =
631
  let edest, esrc = match elt with
632
    Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc
633
  | _ -> failwith "Non-conversion element in conversion" in
634
  let vtype = type_for_elt shape esrc
635
  and rtype = type_for_elt shape edest 0 in
636
  make_arity rtype vtype, elt
637
 
638
let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1))
639
let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2))
640
 
641
(* Operation has an unsigned result even if operands are signed.  *)
642
 
643
let dst_unsign make_arity shape elt =
644
  let vtype = type_for_elt shape elt
645
  and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
646
  make_arity rtype vtype, elt
647
 
648
let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1))
649
 
650
let make_bits_only func shape elt =
651
  let arity, elt' = func shape elt in
652
  arity, bits_of_elt elt'
653
 
654
(* Extend operation.  *)
655
 
656
let extend shape elt =
657
  let vtype = type_for_elt shape elt in
658
  Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
659
 
660
(* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
661
   integer ops respectively, or unsigned for polynomial ops.  *)
662
 
663
let table mkarity shape elt =
664
  let vtype = type_for_elt shape elt in
665
  let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
666
  mkarity vtype op2, bits_of_elt elt
667
 
668
let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2))
669
let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2))
670
 
671
(* Operations where only bits matter.  *)
672
 
673
let bits_1 = make_bits_only elts_same_1
674
let bits_2 = make_bits_only elts_same_2
675
let bits_3 = make_bits_only elts_same_3
676
 
677
(* Store insns.  *)
678
let store_1 shape elt =
679
  let vtype = type_for_elt shape elt in
680
  Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt
681
 
682
let store_3 shape elt =
683
  let vtype = type_for_elt shape elt in
684
  Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt
685
 
686
let make_notype func shape elt =
687
  let arity, _ = func shape elt in
688
  arity, NoElts
689
 
690
let notype_1 = make_notype elts_same_1
691
let notype_2 = make_notype elts_same_2
692
let notype_3 = make_notype elts_same_3
693
 
694
(* Bit-select operations (first operand is unsigned int).  *)
695
 
696
let bit_select shape elt =
697
  let vtype = type_for_elt shape elt
698
  and itype = type_for_elt shape (unsigned_of_elt elt) in
699
  Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts
700
 
701
(* Common lists of supported element types.  *)
702
 
703
let s_8_32 = [S8; S16; S32]
704
let u_8_32 = [U8; U16; U32]
705
let su_8_32 = [S8; S16; S32; U8; U16; U32]
706
let su_8_64 = S64 :: U64 :: su_8_32
707
let su_16_64 = [S16; S32; S64; U16; U32; U64]
708
let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32
709
let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64
710
 
711
let ops =
712
  [
713
    (* Addition.  *)
714
    Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_32;
715
    Vadd, [No_op], All (3, Dreg), "vadd", sign_invar_2, [S64; U64];
716
    Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64;
717
    Vadd, [], Long, "vaddl", elts_same_2, su_8_32;
718
    Vadd, [], Wide, "vaddw", elts_same_2, su_8_32;
719
    Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32;
720
    Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32;
721
    Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
722
      All (3, Dreg), "vRhadd", elts_same_2, su_8_32;
723
    Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
724
      All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32;
725
    Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64;
726
    Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64;
727
    Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64;
728
    Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half],
729
      Narrow, "vRaddhn", sign_invar_2, su_16_64;
730
 
731
    (* Multiplication.  *)
732
    Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32;
733
    Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32;
734
    Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh",
735
      elts_same_2, [S16; S32];
736
    Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ",
737
      elts_same_2, [S16; S32];
738
    Vmul,
739
      [Saturating; Rounding; Doubling; High_half;
740
       Instruction_name ["vqrdmulh"]],
741
      All (3, Dreg), "vqRdmulh",
742
      elts_same_2, [S16; S32];
743
    Vmul,
744
      [Saturating; Rounding; Doubling; High_half;
745
       Instruction_name ["vqrdmulh"]],
746
      All (3, Qreg), "vqRdmulhQ",
747
      elts_same_2, [S16; S32];
748
    Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32;
749
    Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32];
750
 
751
    (* Multiply-accumulate. *)
752
    Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32;
753
    Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32;
754
    Vmla, [], Long, "vmlal", elts_same_io, su_8_32;
755
    Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32];
756
 
757
    (* Multiply-subtract.  *)
758
    Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32;
759
    Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32;
760
    Vmls, [], Long, "vmlsl", elts_same_io, su_8_32;
761
    Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
762
 
763
    (* Subtraction.  *)
764
    Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32;
765
    Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2,  [S64; U64];
766
    Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64;
767
    Vsub, [], Long, "vsubl", elts_same_2, su_8_32;
768
    Vsub, [], Wide, "vsubw", elts_same_2, su_8_32;
769
    Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32;
770
    Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32;
771
    Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64;
772
    Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64;
773
    Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64;
774
    Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half],
775
      Narrow, "vRsubhn", sign_invar_2, su_16_64;
776
 
777
    (* Comparison, equal.  *)
778
    Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32;
779
    Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
780
 
781
    (* Comparison, greater-than or equal.  *)
782
    Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: s_8_32;
783
    Vcge, [Builtin_name "vcgeu"], All (3, Dreg), "vcge", cmp_sign_matters, u_8_32;
784
    Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: s_8_32;
785
    Vcge, [Builtin_name "vcgeu"], All (3, Qreg), "vcgeQ", cmp_sign_matters, u_8_32;
786
 
787
    (* Comparison, less-than or equal.  *)
788
    Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
789
      F32 :: s_8_32;
790
    Vcle, [Flipped "vcgeu"], All (3, Dreg), "vcle", cmp_sign_matters,
791
      u_8_32;
792
    Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
793
      All (3, Qreg), "vcleQ", cmp_sign_matters,
794
      F32 :: s_8_32;
795
    Vcle, [Instruction_name ["vcge"]; Flipped "vcgeuQ"],
796
      All (3, Qreg), "vcleQ", cmp_sign_matters,
797
      u_8_32;
798
 
799
    (* Comparison, greater-than.  *)
800
    Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: s_8_32;
801
    Vcgt, [Builtin_name "vcgtu"], All (3, Dreg), "vcgt", cmp_sign_matters, u_8_32;
802
    Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: s_8_32;
803
    Vcgt, [Builtin_name "vcgtu"], All (3, Qreg), "vcgtQ", cmp_sign_matters, u_8_32;
804
 
805
    (* Comparison, less-than.  *)
806
    Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
807
      F32 :: s_8_32;
808
    Vclt, [Flipped "vcgtu"], All (3, Dreg), "vclt", cmp_sign_matters,
809
      u_8_32;
810
    Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
811
      All (3, Qreg), "vcltQ", cmp_sign_matters,
812
      F32 :: s_8_32;
813
    Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtuQ"],
814
      All (3, Qreg), "vcltQ", cmp_sign_matters,
815
      u_8_32;
816
 
817
    (* Compare absolute greater-than or equal.  *)
818
    Vcage, [Instruction_name ["vacge"]],
819
      All (3, Dreg), "vcage", cmp_sign_matters, [F32];
820
    Vcage, [Instruction_name ["vacge"]],
821
      All (3, Qreg), "vcageQ", cmp_sign_matters, [F32];
822
 
823
    (* Compare absolute less-than or equal.  *)
824
    Vcale, [Instruction_name ["vacge"]; Flipped "vcage"],
825
      All (3, Dreg), "vcale", cmp_sign_matters, [F32];
826
    Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"],
827
      All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32];
828
 
829
    (* Compare absolute greater-than or equal.  *)
830
    Vcagt, [Instruction_name ["vacgt"]],
831
      All (3, Dreg), "vcagt", cmp_sign_matters, [F32];
832
    Vcagt, [Instruction_name ["vacgt"]],
833
      All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32];
834
 
835
    (* Compare absolute less-than or equal.  *)
836
    Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"],
837
      All (3, Dreg), "vcalt", cmp_sign_matters, [F32];
838
    Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"],
839
      All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32];
840
 
841
    (* Test bits.  *)
842
    Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32;
843
    Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32;
844
 
845
    (* Absolute difference.  *)
846
    Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32;
847
    Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32;
848
    Vabd, [], Long, "vabdl", elts_same_2, su_8_32;
849
 
850
    (* Absolute difference and accumulate.  *)
851
    Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32;
852
    Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32;
853
    Vaba, [], Long, "vabal", elts_same_io, su_8_32;
854
 
855
    (* Max.  *)
856
    Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32;
857
    Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32;
858
 
859
    (* Min.  *)
860
    Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32;
861
    Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32;
862
 
863
    (* Pairwise add.  *)
864
    Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32;
865
    Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32;
866
    Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32;
867
 
868
    (* Pairwise add, widen and accumulate.  *)
869
    Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32;
870
    Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32;
871
 
872
    (* Folding maximum, minimum.  *)
873
    Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32;
874
    Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32;
875
 
876
    (* Reciprocal step.  *)
877
    Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32];
878
    Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32];
879
    Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32];
880
    Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32];
881
 
882
    (* Vector shift left.  *)
883
    Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64;
884
    Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64;
885
    Vshl, [Instruction_name ["vrshl"]; Rounding],
886
      All (3, Dreg), "vRshl", reg_shift, su_8_64;
887
    Vshl, [Instruction_name ["vrshl"]; Rounding],
888
      All (3, Qreg), "vRshlQ", reg_shift, su_8_64;
889
    Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64;
890
    Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64;
891
    Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
892
      All (3, Dreg), "vqRshl", reg_shift, su_8_64;
893
    Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
894
      All (3, Qreg), "vqRshlQ", reg_shift, su_8_64;
895
 
896
    (* Vector shift right by constant.  *)
897
    Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64;
898
    Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64;
899
    Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg,
900
      "vRshr_n", shift_right, su_8_64;
901
    Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg,
902
      "vRshrQ_n", shift_right, su_8_64;
903
    Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64;
904
    Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n",
905
      shift_right_sign_invar, su_16_64;
906
    Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64;
907
    Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm,
908
      "vqRshrn_n", shift_right, su_16_64;
909
    Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n",
910
      shift_right_to_uns, [S16; S32; S64];
911
    Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding],
912
      Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64];
913
 
914
    (* Vector shift left by constant.  *)
915
    Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64;
916
    Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64;
917
    Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64;
918
    Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64;
919
    Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n",
920
      shift_left_to_uns, [S8; S16; S32; S64];
921
    Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n",
922
      shift_left_to_uns, [S8; S16; S32; S64];
923
    Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32;
924
 
925
    (* Vector shift right by constant and accumulate.  *)
926
    Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64;
927
    Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64;
928
    Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg,
929
      "vRsra_n", shift_right_acc, su_8_64;
930
    Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg,
931
      "vRsraQ_n", shift_right_acc, su_8_64;
932
 
933
    (* Vector shift right and insert.  *)
934
    Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert,
935
      P8 :: P16 :: su_8_64;
936
    Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert,
937
      P8 :: P16 :: su_8_64;
938
 
939
    (* Vector shift left and insert.  *)
940
    Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert,
941
      P8 :: P16 :: su_8_64;
942
    Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert,
943
      P8 :: P16 :: su_8_64;
944
 
945
    (* Absolute value.  *)
946
    Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32];
947
    Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32];
948
    Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32];
949
    Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32];
950
 
951
    (* Negate.  *)
952
    Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32];
953
    Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32];
954
    Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32];
955
    Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32];
956
 
957
    (* Bitwise not.  *)
958
    Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32;
959
    Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32;
960
 
961
    (* Count leading sign bits.  *)
962
    Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32];
963
    Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32];
964
 
965
    (* Count leading zeros.  *)
966
    Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32;
967
    Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32;
968
 
969
    (* Count number of set bits.  *)
970
    Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8];
971
    Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8];
972
 
973
    (* Reciprocal estimate.  *)
974
    Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32];
975
    Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32];
976
 
977
    (* Reciprocal square-root estimate.  *)
978
    Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32];
979
    Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32];
980
 
981
    (* Get lanes from a vector.  *)
982
    Vget_lane,
983
      [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
984
       Instruction_name ["vmov"]],
985
      Use_operands [| Corereg; Dreg; Immed |],
986
      "vget_lane", get_lane, pf_su_8_32;
987
    Vget_lane,
988
      [No_op;
989
       InfoWord;
990
       Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
991
       Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
992
      Use_operands [| Corereg; Dreg; Immed |],
993
      "vget_lane", notype_2, [S64; U64];
994
    Vget_lane,
995
      [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
996
       Instruction_name ["vmov"]],
997
      Use_operands [| Corereg; Qreg; Immed |],
998
      "vgetQ_lane", get_lane, pf_su_8_32;
999
    Vget_lane,
1000
      [InfoWord;
1001
       Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
1002
       Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1003
      Use_operands [| Corereg; Qreg; Immed |],
1004
      "vgetQ_lane", notype_2, [S64; U64];
1005
 
1006
    (* Set lanes in a vector.  *)
1007
    Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
1008
                Instruction_name ["vmov"]],
1009
      Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
1010
      set_lane, pf_su_8_32;
1011
    Vset_lane, [No_op;
1012
                Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
1013
                Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1014
      Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
1015
      set_lane_notype, [S64; U64];
1016
    Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
1017
                Instruction_name ["vmov"]],
1018
      Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1019
      set_lane, pf_su_8_32;
1020
    Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
1021
                Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1022
      Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1023
      set_lane_notype, [S64; U64];
1024
 
1025
    (* Create vector from literal bit pattern.  *)
1026
    Vcreate,
1027
      [No_op], (* Not really, but it can yield various things that are too
1028
                  hard for the test generator at this time.  *)
1029
      Use_operands [| Dreg; Corereg |], "vcreate", create_vector,
1030
      pf_su_8_64;
1031
 
1032
    (* Set all lanes to the same value.  *)
1033
    Vdup_n,
1034
      [Disassembles_as [Use_operands [| Dreg;
1035
                                        Alternatives [ Corereg;
1036
                                                       Element_of_dreg ] |]]],
1037
      Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
1038
      pf_su_8_32;
1039
    Vdup_n,
1040
      [No_op;
1041
       Instruction_name ["vmov"];
1042
       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1043
      Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
1044
      [S64; U64];
1045
    Vdup_n,
1046
      [Disassembles_as [Use_operands [| Qreg;
1047
                                        Alternatives [ Corereg;
1048
                                                       Element_of_dreg ] |]]],
1049
      Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
1050
      pf_su_8_32;
1051
    Vdup_n,
1052
      [No_op;
1053
       Instruction_name ["vmov"];
1054
       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1055
                        Use_operands [| Dreg; Corereg; Corereg |]]],
1056
      Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1,
1057
      [S64; U64];
1058
 
1059
    (* These are just aliases for the above.  *)
1060
    Vmov_n,
1061
      [Builtin_name "vdup_n";
1062
       Disassembles_as [Use_operands [| Dreg;
1063
                                        Alternatives [ Corereg;
1064
                                                       Element_of_dreg ] |]]],
1065
      Use_operands [| Dreg; Corereg |],
1066
      "vmov_n", bits_1, pf_su_8_32;
1067
    Vmov_n,
1068
      [No_op;
1069
       Builtin_name "vdup_n";
1070
       Instruction_name ["vmov"];
1071
       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1072
      Use_operands [| Dreg; Corereg |],
1073
      "vmov_n", notype_1, [S64; U64];
1074
    Vmov_n,
1075
      [Builtin_name "vdupQ_n";
1076
       Disassembles_as [Use_operands [| Qreg;
1077
                                        Alternatives [ Corereg;
1078
                                                       Element_of_dreg ] |]]],
1079
      Use_operands [| Qreg; Corereg |],
1080
      "vmovQ_n", bits_1, pf_su_8_32;
1081
    Vmov_n,
1082
      [No_op;
1083
       Builtin_name "vdupQ_n";
1084
       Instruction_name ["vmov"];
1085
       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1086
                        Use_operands [| Dreg; Corereg; Corereg |]]],
1087
      Use_operands [| Qreg; Corereg |],
1088
      "vmovQ_n", notype_1, [S64; U64];
1089
 
1090
    (* Duplicate, lane version.  We can't use Use_operands here because the
1091
       rightmost register (always Dreg) would be picked up by find_key_operand,
1092
       when we want the leftmost register to be used in this case (otherwise
1093
       the modes are indistinguishable in neon.md, etc.  *)
1094
    Vdup_lane,
1095
      [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]],
1096
      Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32;
1097
    Vdup_lane,
1098
      [No_op; Const_valuator (fun _ -> 0)],
1099
      Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64];
1100
    Vdup_lane,
1101
      [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]],
1102
      Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32;
1103
    Vdup_lane,
1104
      [No_op; Const_valuator (fun _ -> 0)],
1105
      Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64];
1106
 
1107
    (* Combining vectors.  *)
1108
    Vcombine, [No_op],
1109
      Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2,
1110
      pf_su_8_64;
1111
 
1112
    (* Splitting vectors.  *)
1113
    Vget_high, [No_op],
1114
      Use_operands [| Dreg; Qreg |], "vget_high",
1115
      notype_1, pf_su_8_64;
1116
    Vget_low, [Instruction_name ["vmov"];
1117
               Disassembles_as [Use_operands [| Dreg; Dreg |]];
1118
               Fixed_return_reg],
1119
      Use_operands [| Dreg; Qreg |], "vget_low",
1120
      notype_1, pf_su_8_32;
1121
     Vget_low, [No_op],
1122
      Use_operands [| Dreg; Qreg |], "vget_low",
1123
      notype_1, [S64; U64];
1124
 
1125
    (* Conversions.  *)
1126
    Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1,
1127
      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1128
    Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
1129
      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1130
    Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
1131
      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1132
    Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
1133
      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1134
 
1135
    (* Move, narrowing.  *)
1136
    Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]],
1137
      Narrow, "vmovn", sign_invar_1, su_16_64;
1138
    Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating],
1139
      Narrow, "vqmovn", elts_same_1, su_16_64;
1140
    Vmovn,
1141
      [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign],
1142
      Narrow, "vqmovun", dst_unsign_1,
1143
      [S16; S32; S64];
1144
 
1145
    (* Move, long.  *)
1146
    Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]],
1147
      Long, "vmovl", elts_same_1, su_8_32;
1148
 
1149
    (* Table lookup.  *)
1150
    Vtbl 1,
1151
      [Instruction_name ["vtbl"];
1152
       Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1153
      Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8];
1154
    Vtbl 2, [Instruction_name ["vtbl"]],
1155
      Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2,
1156
      [U8; S8; P8];
1157
    Vtbl 3, [Instruction_name ["vtbl"]],
1158
      Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2,
1159
      [U8; S8; P8];
1160
    Vtbl 4, [Instruction_name ["vtbl"]],
1161
      Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2,
1162
      [U8; S8; P8];
1163
 
1164
    (* Extended table lookup.  *)
1165
    Vtbx 1,
1166
      [Instruction_name ["vtbx"];
1167
       Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1168
      Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8];
1169
    Vtbx 2, [Instruction_name ["vtbx"]],
1170
      Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io,
1171
      [U8; S8; P8];
1172
    Vtbx 3, [Instruction_name ["vtbx"]],
1173
      Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io,
1174
      [U8; S8; P8];
1175
    Vtbx 4, [Instruction_name ["vtbx"]],
1176
      Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io,
1177
      [U8; S8; P8];
1178
 
1179
    (* Multiply, lane.  (note: these were undocumented at the time of
1180
       writing).  *)
1181
    Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane,
1182
      [S16; S32; U16; U32; F32];
1183
    Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane,
1184
      [S16; S32; U16; U32; F32];
1185
 
1186
    (* Multiply-accumulate, lane.  *)
1187
    Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane,
1188
      [S16; S32; U16; U32; F32];
1189
    Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane,
1190
      [S16; S32; U16; U32; F32];
1191
    Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane,
1192
      [S16; S32; U16; U32];
1193
    Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane",
1194
      elts_same_io_lane, [S16; S32];
1195
 
1196
    (* Multiply-subtract, lane.  *)
1197
    Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane,
1198
      [S16; S32; U16; U32; F32];
1199
    Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane,
1200
      [S16; S32; U16; U32; F32];
1201
    Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane,
1202
      [S16; S32; U16; U32];
1203
    Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane",
1204
      elts_same_io_lane, [S16; S32];
1205
 
1206
    (* Long multiply, lane.  *)
1207
    Vmull_lane, [],
1208
      Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32];
1209
 
1210
    (* Saturating doubling long multiply, lane.  *)
1211
    Vqdmull_lane, [Saturating; Doubling],
1212
      Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32];
1213
 
1214
    (* Saturating doubling long multiply high, lane.  *)
1215
    Vqdmulh_lane, [Saturating; Halving],
1216
      By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32];
1217
    Vqdmulh_lane, [Saturating; Halving],
1218
      By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32];
1219
    Vqdmulh_lane, [Saturating; Halving; Rounding;
1220
                   Instruction_name ["vqrdmulh"]],
1221
      By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32];
1222
    Vqdmulh_lane, [Saturating; Halving; Rounding;
1223
                   Instruction_name ["vqrdmulh"]],
1224
      By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32];
1225
 
1226
    (* Vector multiply by scalar.  *)
1227
    Vmul_n, [InfoWord;
1228
             Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1229
             Use_operands [| Dreg; Dreg; Corereg |], "vmul_n",
1230
      sign_invar_2, [S16; S32; U16; U32; F32];
1231
    Vmul_n, [InfoWord;
1232
             Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1233
             Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n",
1234
      sign_invar_2, [S16; S32; U16; U32; F32];
1235
 
1236
    (* Vector long multiply by scalar.  *)
1237
    Vmull_n, [Instruction_name ["vmull"];
1238
              Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]],
1239
              Wide_scalar, "vmull_n",
1240
      elts_same_2, [S16; S32; U16; U32];
1241
 
1242
    (* Vector saturating doubling long multiply by scalar.  *)
1243
    Vqdmull_n, [Saturating; Doubling;
1244
                Disassembles_as [Use_operands [| Qreg; Dreg;
1245
                                                 Element_of_dreg |]]],
1246
                Wide_scalar, "vqdmull_n",
1247
      elts_same_2, [S16; S32];
1248
 
1249
    (* Vector saturating doubling long multiply high by scalar.  *)
1250
    Vqdmulh_n,
1251
      [Saturating; Halving; InfoWord;
1252
       Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1253
      Use_operands [| Qreg; Qreg; Corereg |],
1254
      "vqdmulhQ_n", elts_same_2, [S16; S32];
1255
    Vqdmulh_n,
1256
      [Saturating; Halving; InfoWord;
1257
       Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1258
      Use_operands [| Dreg; Dreg; Corereg |],
1259
      "vqdmulh_n", elts_same_2, [S16; S32];
1260
    Vqdmulh_n,
1261
      [Saturating; Halving; Rounding; InfoWord;
1262
       Instruction_name ["vqrdmulh"];
1263
       Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1264
      Use_operands [| Qreg; Qreg; Corereg |],
1265
      "vqRdmulhQ_n", elts_same_2, [S16; S32];
1266
    Vqdmulh_n,
1267
      [Saturating; Halving; Rounding; InfoWord;
1268
       Instruction_name ["vqrdmulh"];
1269
       Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1270
      Use_operands [| Dreg; Dreg; Corereg |],
1271
      "vqRdmulh_n", elts_same_2, [S16; S32];
1272
 
1273
    (* Vector multiply-accumulate by scalar.  *)
1274
    Vmla_n, [InfoWord;
1275
             Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1276
      Use_operands [| Dreg; Dreg; Corereg |], "vmla_n",
1277
      sign_invar_io, [S16; S32; U16; U32; F32];
1278
    Vmla_n, [InfoWord;
1279
             Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1280
      Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n",
1281
      sign_invar_io, [S16; S32; U16; U32; F32];
1282
    Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32];
1283
    Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io,
1284
      [S16; S32];
1285
 
1286
    (* Vector multiply subtract by scalar.  *)
1287
    Vmls_n, [InfoWord;
1288
             Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1289
      Use_operands [| Dreg; Dreg; Corereg |], "vmls_n",
1290
      sign_invar_io, [S16; S32; U16; U32; F32];
1291
    Vmls_n, [InfoWord;
1292
             Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1293
      Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n",
1294
      sign_invar_io, [S16; S32; U16; U32; F32];
1295
    Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32];
1296
    Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io,
1297
      [S16; S32];
1298
 
1299
    (* Vector extract.  *)
1300
    Vext, [Const_valuator (fun _ -> 0)],
1301
      Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend,
1302
      pf_su_8_64;
1303
    Vext, [Const_valuator (fun _ -> 0)],
1304
      Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend,
1305
      pf_su_8_64;
1306
 
1307
    (* Reverse elements.  *)
1308
    Vrev64, [], All (2, Dreg), "vrev64", bits_1, P8 :: P16 :: F32 :: su_8_32;
1309
    Vrev64, [], All (2, Qreg), "vrev64Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1310
    Vrev32, [], All (2, Dreg), "vrev32", bits_1, [P8; P16; S8; U8; S16; U16];
1311
    Vrev32, [], All (2, Qreg), "vrev32Q", bits_1, [P8; P16; S8; U8; S16; U16];
1312
    Vrev16, [], All (2, Dreg), "vrev16", bits_1, [P8; S8; U8];
1313
    Vrev16, [], All (2, Qreg), "vrev16Q", bits_1, [P8; S8; U8];
1314
 
1315
    (* Bit selection.  *)
1316
    Vbsl,
1317
      [Instruction_name ["vbsl"; "vbit"; "vbif"];
1318
       Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]],
1319
      Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select,
1320
      pf_su_8_64;
1321
    Vbsl,
1322
      [Instruction_name ["vbsl"; "vbit"; "vbif"];
1323
       Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]],
1324
      Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
1325
      pf_su_8_64;
1326
 
1327
    (* Transpose elements.  **NOTE** ReturnPtr goes some of the way towards
1328
       generating good code for intrinsics which return structure types --
1329
       builtins work well by themselves (and understand that the values being
1330
       stored on e.g. the stack also reside in registers, so can optimise the
1331
       stores away entirely if the results are used immediately), but
1332
       intrinsics are very much less efficient. Maybe something can be improved
1333
       re: inlining, or tweaking the ABI used for intrinsics (a special call
1334
       attribute?).
1335
    *)
1336
    Vtrn, [ReturnPtr], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32;
1337
    Vtrn, [ReturnPtr], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
1338
 
1339
    (* Zip elements.  *)
1340
    Vzip, [ReturnPtr], Pair_result Dreg, "vzip", bits_2, pf_su_8_32;
1341
    Vzip, [ReturnPtr], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32;
1342
 
1343
    (* Unzip elements.  *)
1344
    Vuzp, [ReturnPtr], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32;
1345
    Vuzp, [ReturnPtr], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32;
1346
 
1347
    (* Element/structure loads.  VLD1 variants.  *)
1348
    Vldx 1,
1349
      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1350
                                        CstPtrTo Corereg |]]],
1351
      Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1,
1352
      pf_su_8_64;
1353
    Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1354
                                              CstPtrTo Corereg |]]],
1355
      Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1,
1356
      pf_su_8_64;
1357
 
1358
    Vldx_lane 1,
1359
      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1360
                                        CstPtrTo Corereg |]]],
1361
      Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1362
      "vld1_lane", bits_3, pf_su_8_32;
1363
    Vldx_lane 1,
1364
      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1365
                                        CstPtrTo Corereg |]];
1366
       Const_valuator (fun _ -> 0)],
1367
      Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1368
      "vld1_lane", bits_3, [S64; U64];
1369
    Vldx_lane 1,
1370
      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1371
                                        CstPtrTo Corereg |]]],
1372
      Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1373
      "vld1Q_lane", bits_3, pf_su_8_32;
1374
    Vldx_lane 1,
1375
      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1376
                                        CstPtrTo Corereg |]]],
1377
      Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1378
      "vld1Q_lane", bits_3, [S64; U64];
1379
 
1380
    Vldx_dup 1,
1381
      [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg);
1382
                                        CstPtrTo Corereg |]]],
1383
      Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1384
      bits_1, pf_su_8_32;
1385
    Vldx_dup 1,
1386
      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1387
                                        CstPtrTo Corereg |]]],
1388
      Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1389
      bits_1, [S64; U64];
1390
    Vldx_dup 1,
1391
      [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg);
1392
                                        CstPtrTo Corereg |]]],
1393
      Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1394
      bits_1, pf_su_8_32;
1395
    Vldx_dup 1,
1396
      [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1397
                                        CstPtrTo Corereg |]]],
1398
      Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1399
      bits_1, [S64; U64];
1400
 
1401
    (* VST1 variants.  *)
1402
    Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1403
                                              PtrTo Corereg |]]],
1404
      Use_operands [| PtrTo Corereg; Dreg |], "vst1",
1405
      store_1, pf_su_8_64;
1406
    Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1407
                                              PtrTo Corereg |]]],
1408
      Use_operands [| PtrTo Corereg; Qreg |], "vst1Q",
1409
      store_1, pf_su_8_64;
1410
 
1411
    Vstx_lane 1,
1412
      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1413
                                        CstPtrTo Corereg |]]],
1414
      Use_operands [| PtrTo Corereg; Dreg; Immed |],
1415
      "vst1_lane", store_3, pf_su_8_32;
1416
    Vstx_lane 1,
1417
      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1418
                                        CstPtrTo Corereg |]];
1419
       Const_valuator (fun _ -> 0)],
1420
      Use_operands [| PtrTo Corereg; Dreg; Immed |],
1421
      "vst1_lane", store_3, [U64; S64];
1422
    Vstx_lane 1,
1423
      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1424
                                        CstPtrTo Corereg |]]],
1425
      Use_operands [| PtrTo Corereg; Qreg; Immed |],
1426
      "vst1Q_lane", store_3, pf_su_8_32;
1427
    Vstx_lane 1,
1428
      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1429
                                        CstPtrTo Corereg |]]],
1430
      Use_operands [| PtrTo Corereg; Qreg; Immed |],
1431
      "vst1Q_lane", store_3, [U64; S64];
1432
 
1433
    (* VLD2 variants.  *)
1434
    Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1435
      "vld2", bits_1, pf_su_8_32;
1436
    Vldx 2, [Instruction_name ["vld1"]],
1437
       Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1438
      "vld2", bits_1, [S64; U64];
1439
    Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1440
                                              CstPtrTo Corereg |];
1441
                              Use_operands [| VecArray (2, Dreg);
1442
                                              CstPtrTo Corereg |]]],
1443
      Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |],
1444
      "vld2Q", bits_1, pf_su_8_32;
1445
 
1446
    Vldx_lane 2,
1447
      [Disassembles_as [Use_operands
1448
        [| VecArray (2, Element_of_dreg);
1449
           CstPtrTo Corereg |]]],
1450
      Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg;
1451
                      VecArray (2, Dreg); Immed |],
1452
      "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1453
    Vldx_lane 2,
1454
      [Disassembles_as [Use_operands
1455
        [| VecArray (2, Element_of_dreg);
1456
           CstPtrTo Corereg |]]],
1457
      Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg;
1458
                      VecArray (2, Qreg); Immed |],
1459
      "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1460
 
1461
    Vldx_dup 2,
1462
      [Disassembles_as [Use_operands
1463
        [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]],
1464
      Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1465
      "vld2_dup", bits_1, pf_su_8_32;
1466
    Vldx_dup 2,
1467
      [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1468
        [| VecArray (2, Dreg); CstPtrTo Corereg |]]],
1469
      Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1470
      "vld2_dup", bits_1, [S64; U64];
1471
 
1472
    (* VST2 variants.  *)
1473
    Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1474
                                              PtrTo Corereg |]]],
1475
      Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1476
      store_1, pf_su_8_32;
1477
    Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1478
                                              PtrTo Corereg |]];
1479
             Instruction_name ["vst1"]],
1480
      Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1481
      store_1, [S64; U64];
1482
    Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1483
                                              PtrTo Corereg |];
1484
                              Use_operands [| VecArray (2, Dreg);
1485
                                              PtrTo Corereg |]]],
1486
      Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q",
1487
      store_1, pf_su_8_32;
1488
 
1489
    Vstx_lane 2,
1490
      [Disassembles_as [Use_operands
1491
        [| VecArray (2, Element_of_dreg);
1492
           CstPtrTo Corereg |]]],
1493
      Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane",
1494
      store_3, P8 :: P16 :: F32 :: su_8_32;
1495
    Vstx_lane 2,
1496
      [Disassembles_as [Use_operands
1497
        [| VecArray (2, Element_of_dreg);
1498
           CstPtrTo Corereg |]]],
1499
      Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane",
1500
      store_3, [P16; F32; U16; U32; S16; S32];
1501
 
1502
    (* VLD3 variants.  *)
1503
    Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1504
      "vld3", bits_1, pf_su_8_32;
1505
    Vldx 3, [Instruction_name ["vld1"]],
1506
      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1507
      "vld3", bits_1, [S64; U64];
1508
    Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1509
                                              CstPtrTo Corereg |];
1510
                              Use_operands [| VecArray (3, Dreg);
1511
                                              CstPtrTo Corereg |]]],
1512
      Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |],
1513
      "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1514
 
1515
    Vldx_lane 3,
1516
      [Disassembles_as [Use_operands
1517
        [| VecArray (3, Element_of_dreg);
1518
           CstPtrTo Corereg |]]],
1519
      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg;
1520
                                     VecArray (3, Dreg); Immed |],
1521
      "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1522
    Vldx_lane 3,
1523
      [Disassembles_as [Use_operands
1524
        [| VecArray (3, Element_of_dreg);
1525
           CstPtrTo Corereg |]]],
1526
      Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg;
1527
                                     VecArray (3, Qreg); Immed |],
1528
      "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1529
 
1530
    Vldx_dup 3,
1531
      [Disassembles_as [Use_operands
1532
        [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]],
1533
      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1534
      "vld3_dup", bits_1, pf_su_8_32;
1535
    Vldx_dup 3,
1536
      [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1537
        [| VecArray (3, Dreg); CstPtrTo Corereg |]]],
1538
      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1539
      "vld3_dup", bits_1, [S64; U64];
1540
 
1541
    (* VST3 variants.  *)
1542
    Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1543
                                              PtrTo Corereg |]]],
1544
      Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1545
      store_1, pf_su_8_32;
1546
    Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1547
                                              PtrTo Corereg |]];
1548
             Instruction_name ["vst1"]],
1549
      Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1550
      store_1, [S64; U64];
1551
    Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1552
                                              PtrTo Corereg |];
1553
                              Use_operands [| VecArray (3, Dreg);
1554
                                              PtrTo Corereg |]]],
1555
      Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q",
1556
      store_1, pf_su_8_32;
1557
 
1558
    Vstx_lane 3,
1559
      [Disassembles_as [Use_operands
1560
        [| VecArray (3, Element_of_dreg);
1561
           CstPtrTo Corereg |]]],
1562
      Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane",
1563
      store_3, P8 :: P16 :: F32 :: su_8_32;
1564
    Vstx_lane 3,
1565
      [Disassembles_as [Use_operands
1566
        [| VecArray (3, Element_of_dreg);
1567
           CstPtrTo Corereg |]]],
1568
      Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane",
1569
      store_3, [P16; F32; U16; U32; S16; S32];
1570
 
1571
    (* VLD4/VST4 variants.  *)
1572
    Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1573
      "vld4", bits_1, pf_su_8_32;
1574
    Vldx 4, [Instruction_name ["vld1"]],
1575
      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1576
      "vld4", bits_1, [S64; U64];
1577
    Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1578
                                              CstPtrTo Corereg |];
1579
                              Use_operands [| VecArray (4, Dreg);
1580
                                              CstPtrTo Corereg |]]],
1581
      Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |],
1582
      "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1583
 
1584
    Vldx_lane 4,
1585
      [Disassembles_as [Use_operands
1586
        [| VecArray (4, Element_of_dreg);
1587
           CstPtrTo Corereg |]]],
1588
      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg;
1589
                                     VecArray (4, Dreg); Immed |],
1590
      "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1591
    Vldx_lane 4,
1592
      [Disassembles_as [Use_operands
1593
        [| VecArray (4, Element_of_dreg);
1594
           CstPtrTo Corereg |]]],
1595
      Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg;
1596
                      VecArray (4, Qreg); Immed |],
1597
      "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1598
 
1599
    Vldx_dup 4,
1600
      [Disassembles_as [Use_operands
1601
        [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]],
1602
      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1603
      "vld4_dup", bits_1, pf_su_8_32;
1604
    Vldx_dup 4,
1605
      [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1606
        [| VecArray (4, Dreg); CstPtrTo Corereg |]]],
1607
      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1608
      "vld4_dup", bits_1, [S64; U64];
1609
 
1610
    Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1611
                                              PtrTo Corereg |]]],
1612
      Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1613
      store_1, pf_su_8_32;
1614
    Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1615
                                              PtrTo Corereg |]];
1616
             Instruction_name ["vst1"]],
1617
      Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1618
      store_1, [S64; U64];
1619
    Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1620
                                              PtrTo Corereg |];
1621
                              Use_operands [| VecArray (4, Dreg);
1622
                                              PtrTo Corereg |]]],
1623
     Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q",
1624
      store_1, pf_su_8_32;
1625
 
1626
    Vstx_lane 4,
1627
      [Disassembles_as [Use_operands
1628
        [| VecArray (4, Element_of_dreg);
1629
           CstPtrTo Corereg |]]],
1630
      Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane",
1631
      store_3, P8 :: P16 :: F32 :: su_8_32;
1632
    Vstx_lane 4,
1633
      [Disassembles_as [Use_operands
1634
        [| VecArray (4, Element_of_dreg);
1635
           CstPtrTo Corereg |]]],
1636
      Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane",
1637
      store_3, [P16; F32; U16; U32; S16; S32];
1638
 
1639
    (* Logical operations. And.  *)
1640
    Vand, [], All (3, Dreg), "vand", notype_2, su_8_32;
1641
    Vand, [No_op], All (3, Dreg), "vand", notype_2, [S64; U64];
1642
    Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64;
1643
 
1644
    (* Or.  *)
1645
    Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_32;
1646
    Vorr, [No_op], All (3, Dreg), "vorr", notype_2, [S64; U64];
1647
    Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64;
1648
 
1649
    (* Eor.  *)
1650
    Veor, [], All (3, Dreg), "veor", notype_2, su_8_32;
1651
    Veor, [No_op], All (3, Dreg), "veor", notype_2, [S64; U64];
1652
    Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64;
1653
 
1654
    (* Bic (And-not).  *)
1655
    Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_32;
1656
    Vbic, [No_op], All (3, Dreg), "vbic", notype_2, [S64; U64];
1657
    Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64;
1658
 
1659
    (* Or-not.  *)
1660
    Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_32;
1661
    Vorn, [No_op], All (3, Dreg), "vorn", notype_2, [S64; U64];
1662
    Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64;
1663
  ]
1664
 
1665
let reinterp =
1666
  let elems = P8 :: P16 :: F32 :: su_8_64 in
1667
  List.fold_right
1668
    (fun convto acc ->
1669
      let types = List.fold_right
1670
        (fun convfrom acc ->
1671
          if convfrom <> convto then
1672
            Cast (convto, convfrom) :: acc
1673
          else
1674
            acc)
1675
        elems
1676
        []
1677
      in
1678
        let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |],
1679
                      "vreinterpret", conv_1, types
1680
        and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |],
1681
                      "vreinterpretQ", conv_1, types in
1682
        dconv :: qconv :: acc)
1683
    elems
1684
    []
1685
 
1686
(* Output routines.  *)
1687
 
1688
let rec string_of_elt = function
1689
    S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64"
1690
  | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
1691
  | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
1692
  | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
1693
  | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
1694
  | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
1695
  | NoElts -> failwith "No elts"
1696
 
1697
let string_of_elt_dots elt =
1698
  match elt with
1699
    Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b
1700
  | _ -> string_of_elt elt
1701
 
1702
let string_of_vectype vt =
1703
  let rec name affix = function
1704
    T_int8x8 -> affix "int8x8"
1705
  | T_int8x16 -> affix "int8x16"
1706
  | T_int16x4 -> affix "int16x4"
1707
  | T_int16x8 -> affix "int16x8"
1708
  | T_int32x2 -> affix "int32x2"
1709
  | T_int32x4 -> affix "int32x4"
1710
  | T_int64x1 -> affix "int64x1"
1711
  | T_int64x2 -> affix "int64x2"
1712
  | T_uint8x8 -> affix "uint8x8"
1713
  | T_uint8x16 -> affix "uint8x16"
1714
  | T_uint16x4 -> affix "uint16x4"
1715
  | T_uint16x8 -> affix "uint16x8"
1716
  | T_uint32x2 -> affix "uint32x2"
1717
  | T_uint32x4 -> affix "uint32x4"
1718
  | T_uint64x1 -> affix "uint64x1"
1719
  | T_uint64x2 -> affix "uint64x2"
1720
  | T_float32x2 -> affix "float32x2"
1721
  | T_float32x4 -> affix "float32x4"
1722
  | T_poly8x8 -> affix "poly8x8"
1723
  | T_poly8x16 -> affix "poly8x16"
1724
  | T_poly16x4 -> affix "poly16x4"
1725
  | T_poly16x8 -> affix "poly16x8"
1726
  | T_int8 -> affix "int8"
1727
  | T_int16 -> affix "int16"
1728
  | T_int32 -> affix "int32"
1729
  | T_int64 -> affix "int64"
1730
  | T_uint8 -> affix "uint8"
1731
  | T_uint16 -> affix "uint16"
1732
  | T_uint32 -> affix "uint32"
1733
  | T_uint64 -> affix "uint64"
1734
  | T_poly8 -> affix "poly8"
1735
  | T_poly16 -> affix "poly16"
1736
  | T_float32 -> affix "float32"
1737
  | T_immediate _ -> "const int"
1738
  | T_void -> "void"
1739
  | T_intQI -> "__builtin_neon_qi"
1740
  | T_intHI -> "__builtin_neon_hi"
1741
  | T_intSI -> "__builtin_neon_si"
1742
  | T_intDI -> "__builtin_neon_di"
1743
  | T_floatSF -> "__builtin_neon_sf"
1744
  | T_arrayof (num, base) ->
1745
      let basename = name (fun x -> x) base in
1746
      affix (Printf.sprintf "%sx%d" basename num)
1747
  | T_ptrto x ->
1748
      let basename = name affix x in
1749
      Printf.sprintf "%s *" basename
1750
  | T_const x ->
1751
      let basename = name affix x in
1752
      Printf.sprintf "const %s" basename
1753
  in
1754
    name (fun x -> x ^ "_t") vt
1755
 
1756
let string_of_inttype = function
1757
    B_TImode -> "__builtin_neon_ti"
1758
  | B_EImode -> "__builtin_neon_ei"
1759
  | B_OImode -> "__builtin_neon_oi"
1760
  | B_CImode -> "__builtin_neon_ci"
1761
  | B_XImode -> "__builtin_neon_xi"
1762
 
1763
let string_of_mode = function
1764
    V8QI -> "v8qi" | V4HI  -> "v4hi"  | V2SI -> "v2si" | V2SF -> "v2sf"
1765
  | DI   -> "di"   | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
1766
  | V4SF -> "v4sf" | V2DI  -> "v2di"  | QI -> "qi" | HI -> "hi" | SI -> "si"
1767
  | SF -> "sf"
1768
 
1769
(* Use uppercase chars for letters which form part of the intrinsic name, but
1770
   should be omitted from the builtin name (the info is passed in an extra
1771
   argument, instead).  *)
1772
let intrinsic_name name = String.lowercase name
1773
 
1774
(* Allow the name of the builtin to be overridden by things (e.g. Flipped)
1775
   found in the features list.  *)
1776
let builtin_name features name =
1777
  let name = List.fold_right
1778
               (fun el name ->
1779
                 match el with
1780
                   Flipped x | Builtin_name x -> x
1781
                 | _ -> name)
1782
               features name in
1783
  let islower x = let str = String.make 1 x in (String.lowercase str) = str
1784
  and buf = Buffer.create (String.length name) in
1785
  String.iter (fun c -> if islower c then Buffer.add_char buf c) name;
1786
  Buffer.contents buf
1787
 
1788
(* Transform an arity into a list of strings.  *)
1789
let strings_of_arity a =
1790
  match a with
1791
  | Arity0 vt -> [string_of_vectype vt]
1792
  | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2]
1793
  | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1;
1794
                               string_of_vectype vt2;
1795
                               string_of_vectype vt3]
1796
  | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1;
1797
                                    string_of_vectype vt2;
1798
                                    string_of_vectype vt3;
1799
                                    string_of_vectype vt4]
1800
  | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1;
1801
                                         string_of_vectype vt2;
1802
                                         string_of_vectype vt3;
1803
                                         string_of_vectype vt4;
1804
                                         string_of_vectype vt5]
1805
 
1806
(* Suffixes on the end of builtin names that are to be stripped in order
1807
   to obtain the name used as an instruction.  They are only stripped if
1808
   preceded immediately by an underscore.  *)
1809
let suffixes_to_strip = [ "n"; "lane"; "dup" ]
1810
 
1811
(* Get the possible names of an instruction corresponding to a "name" from the
1812
   ops table.  This is done by getting the equivalent builtin name and
1813
   stripping any suffixes from the list at the top of this file, unless
1814
   the features list presents with an Instruction_name entry, in which
1815
   case that is used; or unless the features list presents with a Flipped
1816
   entry, in which case that is used.  If both such entries are present,
1817
   the first in the list will be chosen.  *)
1818
let get_insn_names features name =
1819
  let names = try
1820
  begin
1821
    match List.find (fun feature -> match feature with
1822
                                      Instruction_name _ -> true
1823
                                    | Flipped _ -> true
1824
                                    | _ -> false) features
1825
    with
1826
      Instruction_name names -> names
1827
    | Flipped name -> [name]
1828
    | _ -> assert false
1829
  end
1830
  with Not_found -> [builtin_name features name]
1831
  in
1832
  begin
1833
    List.map (fun name' ->
1834
      try
1835
        let underscore = String.rindex name' '_' in
1836
        let our_suffix = String.sub name' (underscore + 1)
1837
                                    ((String.length name') - underscore - 1)
1838
        in
1839
          let rec strip remaining_suffixes =
1840
            match remaining_suffixes with
1841
              [] -> name'
1842
            | s::ss when our_suffix = s -> String.sub name' 0 underscore
1843
            | _::ss -> strip ss
1844
          in
1845
            strip suffixes_to_strip
1846
      with (Not_found | Invalid_argument _) -> name') names
1847
  end
1848
 
1849
(* Apply a function to each element of a list and then comma-separate
1850
   the resulting strings.  *)
1851
let rec commas f elts acc =
1852
  match elts with
1853
    [] -> acc
1854
  | [elt] -> acc ^ (f elt)
1855
  | elt::elts ->
1856
    commas f elts (acc ^ (f elt) ^ ", ")
1857
 
1858
(* Given a list of features and the shape specified in the "ops" table, apply
1859
   a function to each possible shape that the instruction may have.
1860
   By default, this is the "shape" entry in "ops".  If the features list
1861
   contains a Disassembles_as entry, the shapes contained in that entry are
1862
   mapped to corresponding outputs and returned in a list.  If there is more
1863
   than one Disassembles_as entry, only the first is used.  *)
1864
let analyze_all_shapes features shape f =
1865
  try
1866
    match List.find (fun feature ->
1867
                       match feature with Disassembles_as _ -> true
1868
                                        | _ -> false)
1869
                    features with
1870
      Disassembles_as shapes -> List.map f shapes
1871
    | _ -> assert false
1872
  with Not_found -> [f shape]
1873
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.