OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [config/] [arm/] [neon.ml] - Blame information for rev 282

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 282 jeremybenn
(* Common code for ARM NEON header file, documentation and test case
2
   generators.
3
 
4
   Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
5
   Contributed by CodeSourcery.
6
 
7
   This file is part of GCC.
8
 
9
   GCC is free software; you can redistribute it and/or modify it under
10
   the terms of the GNU General Public License as published by the Free
11
   Software Foundation; either version 3, or (at your option) any later
12
   version.
13
 
14
   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
   WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
   for more details.
18
 
19
   You should have received a copy of the GNU General Public License
20
   along with GCC; see the file COPYING3.  If not see
21
   .  *)
22
 
23
(* Shorthand types for vector elements.  *)
24
type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
25
          | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
26
          | Cast of elts * elts | NoElts
27
 
28
type eltclass = Signed | Unsigned | Float | Poly | Int | Bits
29
              | ConvClass of eltclass * eltclass | NoType
30
 
31
(* These vector types correspond directly to C types.  *)
32
type vectype = T_int8x8    | T_int8x16
33
             | T_int16x4   | T_int16x8
34
             | T_int32x2   | T_int32x4
35
             | T_int64x1   | T_int64x2
36
             | T_uint8x8   | T_uint8x16
37
             | T_uint16x4  | T_uint16x8
38
             | T_uint32x2  | T_uint32x4
39
             | T_uint64x1  | T_uint64x2
40
             | T_float32x2 | T_float32x4
41
             | T_poly8x8   | T_poly8x16
42
             | T_poly16x4  | T_poly16x8
43
             | T_immediate of int * int
44
             | T_int8      | T_int16
45
             | T_int32     | T_int64
46
             | T_uint8     | T_uint16
47
             | T_uint32    | T_uint64
48
             | T_poly8     | T_poly16
49
             | T_float32   | T_arrayof of int * vectype
50
             | T_ptrto of vectype | T_const of vectype
51
             | T_void      | T_intQI
52
             | T_intHI     | T_intSI
53
             | T_intDI     | T_floatSF
54
 
55
(* The meanings of the following are:
56
     TImode : "Tetra", two registers (four words).
57
     EImode : "hExa", three registers (six words).
58
     OImode : "Octa", four registers (eight words).
59
     CImode : "dodeCa", six registers (twelve words).
60
     XImode : "heXadeca", eight registers (sixteen words).
61
*)
62
 
63
type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode
64
 
65
type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
66
               | PtrTo of shape_elt | CstPtrTo of shape_elt
67
               (* These next ones are used only in the test generator.  *)
68
               | Element_of_dreg        (* Used for "lane" variants.  *)
69
               | Element_of_qreg        (* Likewise.  *)
70
               | All_elements_of_dreg   (* Used for "dup" variants.  *)
71
               | Alternatives of shape_elt list (* Used for multiple valid operands *)
72
 
73
type shape_form = All of int * shape_elt
74
                | Long
75
                | Long_noreg of shape_elt
76
                | Wide
77
                | Wide_noreg of shape_elt
78
                | Narrow
79
                | Long_imm
80
                | Narrow_imm
81
                | Binary_imm of shape_elt
82
                | Use_operands of shape_elt array
83
                | By_scalar of shape_elt
84
                | Unary_scalar of shape_elt
85
                | Wide_lane
86
                | Wide_scalar
87
                | Pair_result of shape_elt
88
 
89
type arity = Arity0 of vectype
90
           | Arity1 of vectype * vectype
91
           | Arity2 of vectype * vectype * vectype
92
           | Arity3 of vectype * vectype * vectype * vectype
93
           | Arity4 of vectype * vectype * vectype * vectype * vectype
94
 
95
type vecmode = V8QI | V4HI | V2SI | V2SF | DI
96
             | V16QI | V8HI | V4SI | V4SF | V2DI
97
             | QI | HI | SI | SF
98
 
99
type opcode =
100
  (* Binary ops.  *)
101
    Vadd
102
  | Vmul
103
  | Vmla
104
  | Vmls
105
  | Vsub
106
  | Vceq
107
  | Vcge
108
  | Vcgt
109
  | Vcle
110
  | Vclt
111
  | Vcage
112
  | Vcagt
113
  | Vcale
114
  | Vcalt
115
  | Vtst
116
  | Vabd
117
  | Vaba
118
  | Vmax
119
  | Vmin
120
  | Vpadd
121
  | Vpada
122
  | Vpmax
123
  | Vpmin
124
  | Vrecps
125
  | Vrsqrts
126
  | Vshl
127
  | Vshr_n
128
  | Vshl_n
129
  | Vsra_n
130
  | Vsri
131
  | Vsli
132
  (* Logic binops.  *)
133
  | Vand
134
  | Vorr
135
  | Veor
136
  | Vbic
137
  | Vorn
138
  | Vbsl
139
  (* Ops with scalar.  *)
140
  | Vmul_lane
141
  | Vmla_lane
142
  | Vmls_lane
143
  | Vmul_n
144
  | Vmla_n
145
  | Vmls_n
146
  | Vmull_n
147
  | Vmull_lane
148
  | Vqdmull_n
149
  | Vqdmull_lane
150
  | Vqdmulh_n
151
  | Vqdmulh_lane
152
  (* Unary ops.  *)
153
  | Vabs
154
  | Vneg
155
  | Vcls
156
  | Vclz
157
  | Vcnt
158
  | Vrecpe
159
  | Vrsqrte
160
  | Vmvn
161
  (* Vector extract.  *)
162
  | Vext
163
  (* Reverse elements.  *)
164
  | Vrev64
165
  | Vrev32
166
  | Vrev16
167
  (* Transposition ops.  *)
168
  | Vtrn
169
  | Vzip
170
  | Vuzp
171
  (* Loads and stores (VLD1/VST1/VLD2...), elements and structures.  *)
172
  | Vldx of int
173
  | Vstx of int
174
  | Vldx_lane of int
175
  | Vldx_dup of int
176
  | Vstx_lane of int
177
  (* Set/extract lanes from a vector.  *)
178
  | Vget_lane
179
  | Vset_lane
180
  (* Initialize vector from bit pattern.  *)
181
  | Vcreate
182
  (* Set all lanes to same value.  *)
183
  | Vdup_n
184
  | Vmov_n  (* Is this the same?  *)
185
  (* Duplicate scalar to all lanes of vector.  *)
186
  | Vdup_lane
187
  (* Combine vectors.  *)
188
  | Vcombine
189
  (* Get quadword high/low parts.  *)
190
  | Vget_high
191
  | Vget_low
192
  (* Convert vectors.  *)
193
  | Vcvt
194
  | Vcvt_n
195
  (* Narrow/lengthen vectors.  *)
196
  | Vmovn
197
  | Vmovl
198
  (* Table lookup.  *)
199
  | Vtbl of int
200
  | Vtbx of int
201
  (* Reinterpret casts.  *)
202
  | Vreinterp
203
 
204
(* Features used for documentation, to distinguish between some instruction
205
   variants, and to signal special requirements (e.g. swapping arguments).  *)
206
 
207
type features =
208
    Halving
209
  | Rounding
210
  | Saturating
211
  | Dst_unsign
212
  | High_half
213
  | Doubling
214
  | Flipped of string  (* Builtin name to use with flipped arguments.  *)
215
  | InfoWord  (* Pass an extra word for signage/rounding etc. (always passed
216
                 for All _, Long, Wide, Narrow shape_forms.  *)
217
  | ReturnPtr  (* Pass explicit pointer to return value as first argument.  *)
218
    (* A specification as to the shape of instruction expected upon
219
       disassembly, used if it differs from the shape used to build the
220
       intrinsic prototype.  Multiple entries in the constructor's argument
221
       indicate that the intrinsic expands to more than one assembly
222
       instruction, each with a corresponding shape specified here.  *)
223
  | Disassembles_as of shape_form list
224
  | Builtin_name of string  (* Override the name of the builtin.  *)
225
    (* Override the name of the instruction.  If more than one name
226
       is specified, it means that the instruction can have any of those
227
       names.  *)
228
  | Instruction_name of string list
229
    (* Mark that the intrinsic yields no instructions, or expands to yield
230
       behavior that the test generator cannot test.  *)
231
  | No_op
232
    (* Mark that the intrinsic has constant arguments that cannot be set
233
       to the defaults (zero for pointers and one otherwise) in the test
234
       cases.  The function supplied must return the integer to be written
235
       into the testcase for the argument number (0-based) supplied to it.  *)
236
  | Const_valuator of (int -> int)
237
  | Fixed_return_reg
238
 
239
exception MixedMode of elts * elts
240
 
241
let rec elt_width = function
242
    S8 | U8 | P8 | I8 | B8 -> 8
243
  | S16 | U16 | P16 | I16 | B16 -> 16
244
  | S32 | F32 | U32 | I32 | B32 -> 32
245
  | S64 | U64 | I64 | B64 -> 64
246
  | Conv (a, b) ->
247
      let wa = elt_width a and wb = elt_width b in
248
      if wa = wb then wa else failwith "element width?"
249
  | Cast (a, b) -> raise (MixedMode (a, b))
250
  | NoElts -> failwith "No elts"
251
 
252
let rec elt_class = function
253
    S8 | S16 | S32 | S64 -> Signed
254
  | U8 | U16 | U32 | U64 -> Unsigned
255
  | P8 | P16 -> Poly
256
  | F32 -> Float
257
  | I8 | I16 | I32 | I64 -> Int
258
  | B8 | B16 | B32 | B64 -> Bits
259
  | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
260
  | NoElts -> NoType
261
 
262
let elt_of_class_width c w =
263
  match c, w with
264
    Signed, 8 -> S8
265
  | Signed, 16 -> S16
266
  | Signed, 32 -> S32
267
  | Signed, 64 -> S64
268
  | Float, 32 -> F32
269
  | Unsigned, 8 -> U8
270
  | Unsigned, 16 -> U16
271
  | Unsigned, 32 -> U32
272
  | Unsigned, 64 -> U64
273
  | Poly, 8 -> P8
274
  | Poly, 16 -> P16
275
  | Int, 8 -> I8
276
  | Int, 16 -> I16
277
  | Int, 32 -> I32
278
  | Int, 64 -> I64
279
  | Bits, 8 -> B8
280
  | Bits, 16 -> B16
281
  | Bits, 32 -> B32
282
  | Bits, 64 -> B64
283
  | _ -> failwith "Bad element type"
284
 
285
(* Return unsigned integer element the same width as argument.  *)
286
let unsigned_of_elt elt =
287
  elt_of_class_width Unsigned (elt_width elt)
288
 
289
let signed_of_elt elt =
290
  elt_of_class_width Signed (elt_width elt)
291
 
292
(* Return untyped bits element the same width as argument.  *)
293
let bits_of_elt elt =
294
  elt_of_class_width Bits (elt_width elt)
295
 
296
let non_signed_variant = function
297
    S8 -> I8
298
  | S16 -> I16
299
  | S32 -> I32
300
  | S64 -> I64
301
  | U8 -> I8
302
  | U16 -> I16
303
  | U32 -> I32
304
  | U64 -> I64
305
  | x -> x
306
 
307
let poly_unsigned_variant v =
308
  let elclass = match elt_class v with
309
    Poly -> Unsigned
310
  | x -> x in
311
  elt_of_class_width elclass (elt_width v)
312
 
313
let widen_elt elt =
314
  let w = elt_width elt
315
  and c = elt_class elt in
316
  elt_of_class_width c (w * 2)
317
 
318
let narrow_elt elt =
319
  let w = elt_width elt
320
  and c = elt_class elt in
321
  elt_of_class_width c (w / 2)
322
 
323
(* If we're trying to find a mode from a "Use_operands" instruction, use the
324
   last vector operand as the dominant mode used to invoke the correct builtin.
325
   We must stick to this rule in neon.md.  *)
326
let find_key_operand operands =
327
  let rec scan opno =
328
    match operands.(opno) with
329
      Qreg -> Qreg
330
    | Dreg -> Dreg
331
    | VecArray (_, Qreg) -> Qreg
332
    | VecArray (_, Dreg) -> Dreg
333
    | _ -> scan (opno-1)
334
  in
335
    scan ((Array.length operands) - 1)
336
 
337
let rec mode_of_elt elt shape =
338
  let flt = match elt_class elt with
339
    Float | ConvClass(_, Float) -> true | _ -> false in
340
  let idx =
341
    match elt_width elt with
342
      8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
343
    | _ -> failwith "Bad element width"
344
  in match shape with
345
    All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
346
  | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
347
      [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
348
  | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
349
  | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
350
      [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
351
  | All (_, (Corereg | PtrTo _ | CstPtrTo _)) ->
352
      [| QI; HI; if flt then SF else SI; DI |].(idx)
353
  | Long | Wide | Wide_lane | Wide_scalar
354
  | Long_imm ->
355
      [| V8QI; V4HI; V2SI; DI |].(idx)
356
  | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
357
  | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
358
  | _ -> failwith "invalid shape"
359
 
360
(* Modify an element type dependent on the shape of the instruction and the
361
   operand number.  *)
362
 
363
let shapemap shape no =
364
  let ident = fun x -> x in
365
  match shape with
366
    All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _
367
  | Binary_imm _ -> ident
368
  | Long | Long_noreg _ | Wide_scalar | Long_imm ->
369
      [| widen_elt; ident; ident |].(no)
370
  | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no)
371
  | Wide_lane -> [| widen_elt; ident; ident; ident |].(no)
372
  | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no)
373
 
374
(* Register type (D/Q) of an operand, based on shape and operand number.  *)
375
 
376
let regmap shape no =
377
  match shape with
378
    All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg
379
  | Long -> [| Qreg; Dreg; Dreg |].(no)
380
  | Wide -> [| Qreg; Qreg; Dreg |].(no)
381
  | Narrow -> [| Dreg; Qreg; Qreg |].(no)
382
  | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no)
383
  | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no)
384
  | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no)
385
  | Unary_scalar reg -> [| reg; Dreg; Immed |].(no)
386
  | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no)
387
  | Binary_imm reg -> [| reg; reg; Immed |].(no)
388
  | Long_imm -> [| Qreg; Dreg; Immed |].(no)
389
  | Narrow_imm -> [| Dreg; Qreg; Immed |].(no)
390
  | Use_operands these -> these.(no)
391
 
392
let type_for_elt shape elt no =
393
  let elt = (shapemap shape no) elt in
394
  let reg = regmap shape no in
395
  let rec type_for_reg_elt reg elt =
396
    match reg with
397
      Dreg ->
398
        begin match elt with
399
          S8 -> T_int8x8
400
        | S16 -> T_int16x4
401
        | S32 -> T_int32x2
402
        | S64 -> T_int64x1
403
        | U8 -> T_uint8x8
404
        | U16 -> T_uint16x4
405
        | U32 -> T_uint32x2
406
        | U64 -> T_uint64x1
407
        | F32 -> T_float32x2
408
        | P8 -> T_poly8x8
409
        | P16 -> T_poly16x4
410
        | _ -> failwith "Bad elt type"
411
        end
412
    | Qreg ->
413
        begin match elt with
414
          S8 -> T_int8x16
415
        | S16 -> T_int16x8
416
        | S32 -> T_int32x4
417
        | S64 -> T_int64x2
418
        | U8 -> T_uint8x16
419
        | U16 -> T_uint16x8
420
        | U32 -> T_uint32x4
421
        | U64 -> T_uint64x2
422
        | F32 -> T_float32x4
423
        | P8 -> T_poly8x16
424
        | P16 -> T_poly16x8
425
        | _ -> failwith "Bad elt type"
426
        end
427
    | Corereg ->
428
        begin match elt with
429
          S8 -> T_int8
430
        | S16 -> T_int16
431
        | S32 -> T_int32
432
        | S64 -> T_int64
433
        | U8 -> T_uint8
434
        | U16 -> T_uint16
435
        | U32 -> T_uint32
436
        | U64 -> T_uint64
437
        | P8 -> T_poly8
438
        | P16 -> T_poly16
439
        | F32 -> T_float32
440
        | _ -> failwith "Bad elt type"
441
        end
442
    | Immed ->
443
        T_immediate (0, 0)
444
    | VecArray (num, sub) ->
445
        T_arrayof (num, type_for_reg_elt sub elt)
446
    | PtrTo x ->
447
        T_ptrto (type_for_reg_elt x elt)
448
    | CstPtrTo x ->
449
        T_ptrto (T_const (type_for_reg_elt x elt))
450
    (* Anything else is solely for the use of the test generator.  *)
451
    | _ -> assert false
452
  in
453
    type_for_reg_elt reg elt
454
 
455
(* Return size of a vector type, in bits.  *)
456
let vectype_size = function
457
    T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
458
  | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
459
  | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
460
  | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
461
  | T_uint8x16 | T_uint16x8  | T_uint32x4  | T_uint64x2
462
  | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
463
  | _ -> raise Not_found
464
 
465
let inttype_for_array num elttype =
466
  let eltsize = vectype_size elttype in
467
  let numwords = (num * eltsize) / 32 in
468
  match numwords with
469
    4 -> B_TImode
470
  | 6 -> B_EImode
471
  | 8 -> B_OImode
472
  | 12 -> B_CImode
473
  | 16 -> B_XImode
474
  | _ -> failwith ("no int type for size " ^ string_of_int numwords)
475
 
476
(* These functions return pairs of (internal, external) types, where "internal"
477
   types are those seen by GCC, and "external" are those seen by the assembler.
478
   These types aren't necessarily the same, since the intrinsics can munge more
479
   than one C type into each assembler opcode.  *)
480
 
481
let make_sign_invariant func shape elt =
482
  let arity, elt' = func shape elt in
483
  arity, non_signed_variant elt'
484
 
485
(* Don't restrict any types.  *)
486
 
487
let elts_same make_arity shape elt =
488
  let vtype = type_for_elt shape elt in
489
  make_arity vtype, elt
490
 
491
(* As sign_invar_*, but when sign matters.  *)
492
let elts_same_io_lane =
493
  elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
494
 
495
let elts_same_io =
496
  elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2))
497
 
498
let elts_same_2_lane =
499
  elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3))
500
 
501
let elts_same_3 = elts_same_2_lane
502
 
503
let elts_same_2 =
504
  elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2))
505
 
506
let elts_same_1 =
507
  elts_same (fun vtype -> Arity1 (vtype 0, vtype 1))
508
 
509
(* Use for signed/unsigned invariant operations (i.e. where the operation
510
   doesn't depend on the sign of the data.  *)
511
 
512
let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
513
let sign_invar_io = make_sign_invariant elts_same_io
514
let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
515
let sign_invar_2 = make_sign_invariant elts_same_2
516
let sign_invar_1 = make_sign_invariant elts_same_1
517
 
518
(* Sign-sensitive comparison.  *)
519
 
520
let cmp_sign_matters shape elt =
521
  let vtype = type_for_elt shape elt
522
  and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
523
  Arity2 (rtype, vtype 1, vtype 2), elt
524
 
525
(* Signed/unsigned invariant comparison.  *)
526
 
527
let cmp_sign_invar shape elt =
528
  let shape', elt' = cmp_sign_matters shape elt in
529
  let elt'' =
530
    match non_signed_variant elt' with
531
      P8 -> I8
532
    | x -> x
533
  in
534
    shape', elt''
535
 
536
(* Comparison (VTST) where only the element width matters.  *)
537
 
538
let cmp_bits shape elt =
539
  let vtype = type_for_elt shape elt
540
  and rtype = type_for_elt shape (unsigned_of_elt elt) 0
541
  and bits_only = bits_of_elt elt in
542
  Arity2 (rtype, vtype 1, vtype 2), bits_only
543
 
544
let reg_shift shape elt =
545
  let vtype = type_for_elt shape elt
546
  and op2type = type_for_elt shape (signed_of_elt elt) 2 in
547
  Arity2 (vtype 0, vtype 1, op2type), elt
548
 
549
(* Genericised constant-shift type-generating function.  *)
550
 
551
let const_shift mkimm ?arity ?result shape elt =
552
  let op2type = (shapemap shape 2) elt in
553
  let op2width = elt_width op2type in
554
  let op2 = mkimm op2width
555
  and op1 = type_for_elt shape elt 1
556
  and r_elt =
557
    match result with
558
      None -> elt
559
    | Some restriction -> restriction elt in
560
  let rtype = type_for_elt shape r_elt 0 in
561
  match arity with
562
    None -> Arity2 (rtype, op1, op2), elt
563
  | Some mkarity -> mkarity rtype op1 op2, elt
564
 
565
(* Use for immediate right-shifts.  *)
566
 
567
let shift_right shape elt =
568
  const_shift (fun imm -> T_immediate (1, imm)) shape elt
569
 
570
let shift_right_acc shape elt =
571
  const_shift (fun imm -> T_immediate (1, imm))
572
    ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt
573
 
574
(* Use for immediate right-shifts when the operation doesn't care about
575
   signedness.  *)
576
 
577
let shift_right_sign_invar =
578
  make_sign_invariant shift_right
579
 
580
(* Immediate right-shift; result is unsigned even when operand is signed.  *)
581
 
582
let shift_right_to_uns shape elt =
583
  const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt
584
    shape elt
585
 
586
(* Immediate left-shift.  *)
587
 
588
let shift_left shape elt =
589
  const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt
590
 
591
(* Immediate left-shift, unsigned result.  *)
592
 
593
let shift_left_to_uns shape elt =
594
  const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt
595
    shape elt
596
 
597
(* Immediate left-shift, don't care about signs.  *)
598
 
599
let shift_left_sign_invar =
600
  make_sign_invariant shift_left
601
 
602
(* Shift left/right and insert: only element size matters.  *)
603
 
604
let shift_insert shape elt =
605
  let arity, elt =
606
    const_shift (fun imm -> T_immediate (1, imm))
607
    ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in
608
  arity, bits_of_elt elt
609
 
610
(* Get/set lane.  *)
611
 
612
let get_lane shape elt =
613
  let vtype = type_for_elt shape elt in
614
  Arity2 (vtype 0, vtype 1, vtype 2),
615
    (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x)
616
 
617
let set_lane shape elt =
618
  let vtype = type_for_elt shape elt in
619
  Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
620
 
621
let set_lane_notype shape elt =
622
  let vtype = type_for_elt shape elt in
623
  Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts
624
 
625
let create_vector shape elt =
626
  let vtype = type_for_elt shape U64 1
627
  and rtype = type_for_elt shape elt 0 in
628
  Arity1 (rtype, vtype), elt
629
 
630
let conv make_arity shape elt =
631
  let edest, esrc = match elt with
632
    Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc
633
  | _ -> failwith "Non-conversion element in conversion" in
634
  let vtype = type_for_elt shape esrc
635
  and rtype = type_for_elt shape edest 0 in
636
  make_arity rtype vtype, elt
637
 
638
let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1))
639
let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2))
640
 
641
(* Operation has an unsigned result even if operands are signed.  *)
642
 
643
let dst_unsign make_arity shape elt =
644
  let vtype = type_for_elt shape elt
645
  and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
646
  make_arity rtype vtype, elt
647
 
648
let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1))
649
 
650
let make_bits_only func shape elt =
651
  let arity, elt' = func shape elt in
652
  arity, bits_of_elt elt'
653
 
654
(* Extend operation.  *)
655
 
656
let extend shape elt =
657
  let vtype = type_for_elt shape elt in
658
  Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
659
 
660
(* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
661
   integer ops respectively, or unsigned for polynomial ops.  *)
662
 
663
let table mkarity shape elt =
664
  let vtype = type_for_elt shape elt in
665
  let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
666
  mkarity vtype op2, bits_of_elt elt
667
 
668
let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2))
669
let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2))
670
 
671
(* Operations where only bits matter.  *)
672
 
673
let bits_1 = make_bits_only elts_same_1
674
let bits_2 = make_bits_only elts_same_2
675
let bits_3 = make_bits_only elts_same_3
676
 
677
(* Store insns.  *)
678
let store_1 shape elt =
679
  let vtype = type_for_elt shape elt in
680
  Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt
681
 
682
let store_3 shape elt =
683
  let vtype = type_for_elt shape elt in
684
  Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt
685
 
686
let make_notype func shape elt =
687
  let arity, _ = func shape elt in
688
  arity, NoElts
689
 
690
let notype_1 = make_notype elts_same_1
691
let notype_2 = make_notype elts_same_2
692
let notype_3 = make_notype elts_same_3
693
 
694
(* Bit-select operations (first operand is unsigned int).  *)
695
 
696
let bit_select shape elt =
697
  let vtype = type_for_elt shape elt
698
  and itype = type_for_elt shape (unsigned_of_elt elt) in
699
  Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts
700
 
701
(* Common lists of supported element types.  *)
702
 
703
let su_8_32 = [S8; S16; S32; U8; U16; U32]
704
let su_8_64 = S64 :: U64 :: su_8_32
705
let su_16_64 = [S16; S32; S64; U16; U32; U64]
706
let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32
707
let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64
708
 
709
let ops =
710
  [
711
    (* Addition.  *)
712
    Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_64;
713
    Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64;
714
    Vadd, [], Long, "vaddl", elts_same_2, su_8_32;
715
    Vadd, [], Wide, "vaddw", elts_same_2, su_8_32;
716
    Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32;
717
    Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32;
718
    Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
719
      All (3, Dreg), "vRhadd", elts_same_2, su_8_32;
720
    Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
721
      All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32;
722
    Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64;
723
    Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64;
724
    Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64;
725
    Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half],
726
      Narrow, "vRaddhn", sign_invar_2, su_16_64;
727
 
728
    (* Multiplication.  *)
729
    Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32;
730
    Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32;
731
    Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh",
732
      elts_same_2, [S16; S32];
733
    Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ",
734
      elts_same_2, [S16; S32];
735
    Vmul,
736
      [Saturating; Rounding; Doubling; High_half;
737
       Instruction_name ["vqrdmulh"]],
738
      All (3, Dreg), "vqRdmulh",
739
      elts_same_2, [S16; S32];
740
    Vmul,
741
      [Saturating; Rounding; Doubling; High_half;
742
       Instruction_name ["vqrdmulh"]],
743
      All (3, Qreg), "vqRdmulhQ",
744
      elts_same_2, [S16; S32];
745
    Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32;
746
    Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32];
747
 
748
    (* Multiply-accumulate. *)
749
    Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32;
750
    Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32;
751
    Vmla, [], Long, "vmlal", elts_same_io, su_8_32;
752
    Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32];
753
 
754
    (* Multiply-subtract.  *)
755
    Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32;
756
    Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32;
757
    Vmls, [], Long, "vmlsl", elts_same_io, su_8_32;
758
    Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
759
 
760
    (* Subtraction.  *)
761
    Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_64;
762
    Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64;
763
    Vsub, [], Long, "vsubl", elts_same_2, su_8_32;
764
    Vsub, [], Wide, "vsubw", elts_same_2, su_8_32;
765
    Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32;
766
    Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32;
767
    Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64;
768
    Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64;
769
    Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64;
770
    Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half],
771
      Narrow, "vRsubhn", sign_invar_2, su_16_64;
772
 
773
    (* Comparison, equal.  *)
774
    Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32;
775
    Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
776
 
777
    (* Comparison, greater-than or equal.  *)
778
    Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32;
779
    Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32;
780
 
781
    (* Comparison, less-than or equal.  *)
782
    Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
783
      F32 :: su_8_32;
784
    Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
785
      All (3, Qreg), "vcleQ", cmp_sign_matters,
786
      F32 :: su_8_32;
787
 
788
    (* Comparison, greater-than.  *)
789
    Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32;
790
    Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32;
791
 
792
    (* Comparison, less-than.  *)
793
    Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
794
      F32 :: su_8_32;
795
    Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
796
      All (3, Qreg), "vcltQ", cmp_sign_matters,
797
      F32 :: su_8_32;
798
 
799
    (* Compare absolute greater-than or equal.  *)
800
    Vcage, [Instruction_name ["vacge"]],
801
      All (3, Dreg), "vcage", cmp_sign_matters, [F32];
802
    Vcage, [Instruction_name ["vacge"]],
803
      All (3, Qreg), "vcageQ", cmp_sign_matters, [F32];
804
 
805
    (* Compare absolute less-than or equal.  *)
806
    Vcale, [Instruction_name ["vacge"]; Flipped "vcage"],
807
      All (3, Dreg), "vcale", cmp_sign_matters, [F32];
808
    Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"],
809
      All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32];
810
 
811
    (* Compare absolute greater-than or equal.  *)
812
    Vcagt, [Instruction_name ["vacgt"]],
813
      All (3, Dreg), "vcagt", cmp_sign_matters, [F32];
814
    Vcagt, [Instruction_name ["vacgt"]],
815
      All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32];
816
 
817
    (* Compare absolute less-than or equal.  *)
818
    Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"],
819
      All (3, Dreg), "vcalt", cmp_sign_matters, [F32];
820
    Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"],
821
      All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32];
822
 
823
    (* Test bits.  *)
824
    Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32;
825
    Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32;
826
 
827
    (* Absolute difference.  *)
828
    Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32;
829
    Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32;
830
    Vabd, [], Long, "vabdl", elts_same_2, su_8_32;
831
 
832
    (* Absolute difference and accumulate.  *)
833
    Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32;
834
    Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32;
835
    Vaba, [], Long, "vabal", elts_same_io, su_8_32;
836
 
837
    (* Max.  *)
838
    Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32;
839
    Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32;
840
 
841
    (* Min.  *)
842
    Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32;
843
    Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32;
844
 
845
    (* Pairwise add.  *)
846
    Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32;
847
    Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32;
848
    Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32;
849
 
850
    (* Pairwise add, widen and accumulate.  *)
851
    Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32;
852
    Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32;
853
 
854
    (* Folding maximum, minimum.  *)
855
    Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32;
856
    Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32;
857
 
858
    (* Reciprocal step.  *)
859
    Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32];
860
    Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32];
861
    Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32];
862
    Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32];
863
 
864
    (* Vector shift left.  *)
865
    Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64;
866
    Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64;
867
    Vshl, [Instruction_name ["vrshl"]; Rounding],
868
      All (3, Dreg), "vRshl", reg_shift, su_8_64;
869
    Vshl, [Instruction_name ["vrshl"]; Rounding],
870
      All (3, Qreg), "vRshlQ", reg_shift, su_8_64;
871
    Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64;
872
    Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64;
873
    Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
874
      All (3, Dreg), "vqRshl", reg_shift, su_8_64;
875
    Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
876
      All (3, Qreg), "vqRshlQ", reg_shift, su_8_64;
877
 
878
    (* Vector shift right by constant.  *)
879
    Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64;
880
    Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64;
881
    Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg,
882
      "vRshr_n", shift_right, su_8_64;
883
    Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg,
884
      "vRshrQ_n", shift_right, su_8_64;
885
    Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64;
886
    Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n",
887
      shift_right_sign_invar, su_16_64;
888
    Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64;
889
    Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm,
890
      "vqRshrn_n", shift_right, su_16_64;
891
    Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n",
892
      shift_right_to_uns, [S16; S32; S64];
893
    Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding],
894
      Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64];
895
 
896
    (* Vector shift left by constant.  *)
897
    Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64;
898
    Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64;
899
    Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64;
900
    Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64;
901
    Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n",
902
      shift_left_to_uns, [S8; S16; S32; S64];
903
    Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n",
904
      shift_left_to_uns, [S8; S16; S32; S64];
905
    Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32;
906
 
907
    (* Vector shift right by constant and accumulate.  *)
908
    Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64;
909
    Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64;
910
    Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg,
911
      "vRsra_n", shift_right_acc, su_8_64;
912
    Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg,
913
      "vRsraQ_n", shift_right_acc, su_8_64;
914
 
915
    (* Vector shift right and insert.  *)
916
    Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert,
917
      P8 :: P16 :: su_8_64;
918
    Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert,
919
      P8 :: P16 :: su_8_64;
920
 
921
    (* Vector shift left and insert.  *)
922
    Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert,
923
      P8 :: P16 :: su_8_64;
924
    Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert,
925
      P8 :: P16 :: su_8_64;
926
 
927
    (* Absolute value.  *)
928
    Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32];
929
    Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32];
930
    Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32];
931
    Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32];
932
 
933
    (* Negate.  *)
934
    Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32];
935
    Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32];
936
    Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32];
937
    Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32];
938
 
939
    (* Bitwise not.  *)
940
    Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32;
941
    Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32;
942
 
943
    (* Count leading sign bits.  *)
944
    Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32];
945
    Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32];
946
 
947
    (* Count leading zeros.  *)
948
    Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32;
949
    Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32;
950
 
951
    (* Count number of set bits.  *)
952
    Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8];
953
    Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8];
954
 
955
    (* Reciprocal estimate.  *)
956
    Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32];
957
    Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32];
958
 
959
    (* Reciprocal square-root estimate.  *)
960
    Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32];
961
    Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32];
962
 
963
    (* Get lanes from a vector.  *)
964
    Vget_lane,
965
      [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
966
       Instruction_name ["vmov"]],
967
      Use_operands [| Corereg; Dreg; Immed |],
968
      "vget_lane", get_lane, pf_su_8_32;
969
    Vget_lane,
970
      [InfoWord;
971
       Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
972
       Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
973
      Use_operands [| Corereg; Dreg; Immed |],
974
      "vget_lane", notype_2, [S64; U64];
975
    Vget_lane,
976
      [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
977
       Instruction_name ["vmov"]],
978
      Use_operands [| Corereg; Qreg; Immed |],
979
      "vgetQ_lane", get_lane, pf_su_8_32;
980
    Vget_lane,
981
      [InfoWord;
982
       Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
983
       Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
984
      Use_operands [| Corereg; Qreg; Immed |],
985
      "vgetQ_lane", notype_2, [S64; U64];
986
 
987
    (* Set lanes in a vector.  *)
988
    Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
989
                Instruction_name ["vmov"]],
990
      Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
991
      set_lane, pf_su_8_32;
992
    Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
993
                Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
994
      Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
995
      set_lane_notype, [S64; U64];
996
    Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
997
                Instruction_name ["vmov"]],
998
      Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
999
      set_lane, pf_su_8_32;
1000
    Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
1001
                Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1002
      Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1003
      set_lane_notype, [S64; U64];
1004
 
1005
    (* Create vector from literal bit pattern.  *)
1006
    Vcreate,
1007
      [No_op], (* Not really, but it can yield various things that are too
1008
                  hard for the test generator at this time.  *)
1009
      Use_operands [| Dreg; Corereg |], "vcreate", create_vector,
1010
      pf_su_8_64;
1011
 
1012
    (* Set all lanes to the same value.  *)
1013
    Vdup_n,
1014
      [Disassembles_as [Use_operands [| Dreg;
1015
                                        Alternatives [ Corereg;
1016
                                                       Element_of_dreg ] |]]],
1017
      Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
1018
      pf_su_8_32;
1019
    Vdup_n,
1020
      [Instruction_name ["vmov"];
1021
       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1022
      Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
1023
      [S64; U64];
1024
    Vdup_n,
1025
      [Disassembles_as [Use_operands [| Qreg;
1026
                                        Alternatives [ Corereg;
1027
                                                       Element_of_dreg ] |]]],
1028
      Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
1029
      pf_su_8_32;
1030
    Vdup_n,
1031
      [Instruction_name ["vmov"];
1032
       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1033
                        Use_operands [| Dreg; Corereg; Corereg |]]],
1034
      Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1,
1035
      [S64; U64];
1036
 
1037
    (* These are just aliases for the above.  *)
1038
    Vmov_n,
1039
      [Builtin_name "vdup_n";
1040
       Disassembles_as [Use_operands [| Dreg;
1041
                                        Alternatives [ Corereg;
1042
                                                       Element_of_dreg ] |]]],
1043
      Use_operands [| Dreg; Corereg |],
1044
      "vmov_n", bits_1, pf_su_8_32;
1045
    Vmov_n,
1046
      [Builtin_name "vdup_n";
1047
       Instruction_name ["vmov"];
1048
       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1049
      Use_operands [| Dreg; Corereg |],
1050
      "vmov_n", notype_1, [S64; U64];
1051
    Vmov_n,
1052
      [Builtin_name "vdupQ_n";
1053
       Disassembles_as [Use_operands [| Qreg;
1054
                                        Alternatives [ Corereg;
1055
                                                       Element_of_dreg ] |]]],
1056
      Use_operands [| Qreg; Corereg |],
1057
      "vmovQ_n", bits_1, pf_su_8_32;
1058
    Vmov_n,
1059
      [Builtin_name "vdupQ_n";
1060
       Instruction_name ["vmov"];
1061
       Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1062
                        Use_operands [| Dreg; Corereg; Corereg |]]],
1063
      Use_operands [| Qreg; Corereg |],
1064
      "vmovQ_n", notype_1, [S64; U64];
1065
 
1066
    (* Duplicate, lane version.  We can't use Use_operands here because the
1067
       rightmost register (always Dreg) would be picked up by find_key_operand,
1068
       when we want the leftmost register to be used in this case (otherwise
1069
       the modes are indistinguishable in neon.md, etc.  *)
1070
    Vdup_lane,
1071
      [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]],
1072
      Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32;
1073
    Vdup_lane,
1074
      [No_op; Const_valuator (fun _ -> 0)],
1075
      Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64];
1076
    Vdup_lane,
1077
      [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]],
1078
      Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32;
1079
    Vdup_lane,
1080
      [No_op; Const_valuator (fun _ -> 0)],
1081
      Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64];
1082
 
1083
    (* Combining vectors.  *)
1084
    Vcombine, [No_op],
1085
      Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2,
1086
      pf_su_8_64;
1087
 
1088
    (* Splitting vectors.  *)
1089
    Vget_high, [No_op],
1090
      Use_operands [| Dreg; Qreg |], "vget_high",
1091
      notype_1, pf_su_8_64;
1092
    Vget_low, [Instruction_name ["vmov"];
1093
               Disassembles_as [Use_operands [| Dreg; Dreg |]];
1094
               Fixed_return_reg],
1095
      Use_operands [| Dreg; Qreg |], "vget_low",
1096
      notype_1, pf_su_8_32;
1097
     Vget_low, [No_op],
1098
      Use_operands [| Dreg; Qreg |], "vget_low",
1099
      notype_1, [S64; U64];
1100
 
1101
    (* Conversions.  *)
1102
    Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1,
1103
      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1104
    Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
1105
      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1106
    Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
1107
      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1108
    Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
1109
      [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1110
 
1111
    (* Move, narrowing.  *)
1112
    Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]],
1113
      Narrow, "vmovn", sign_invar_1, su_16_64;
1114
    Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating],
1115
      Narrow, "vqmovn", elts_same_1, su_16_64;
1116
    Vmovn,
1117
      [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign],
1118
      Narrow, "vqmovun", dst_unsign_1,
1119
      [S16; S32; S64];
1120
 
1121
    (* Move, long.  *)
1122
    Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]],
1123
      Long, "vmovl", elts_same_1, su_8_32;
1124
 
1125
    (* Table lookup.  *)
1126
    Vtbl 1,
1127
      [Instruction_name ["vtbl"];
1128
       Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1129
      Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8];
1130
    Vtbl 2, [Instruction_name ["vtbl"]],
1131
      Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2,
1132
      [U8; S8; P8];
1133
    Vtbl 3, [Instruction_name ["vtbl"]],
1134
      Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2,
1135
      [U8; S8; P8];
1136
    Vtbl 4, [Instruction_name ["vtbl"]],
1137
      Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2,
1138
      [U8; S8; P8];
1139
 
1140
    (* Extended table lookup.  *)
1141
    Vtbx 1,
1142
      [Instruction_name ["vtbx"];
1143
       Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1144
      Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8];
1145
    Vtbx 2, [Instruction_name ["vtbx"]],
1146
      Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io,
1147
      [U8; S8; P8];
1148
    Vtbx 3, [Instruction_name ["vtbx"]],
1149
      Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io,
1150
      [U8; S8; P8];
1151
    Vtbx 4, [Instruction_name ["vtbx"]],
1152
      Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io,
1153
      [U8; S8; P8];
1154
 
1155
    (* Multiply, lane.  (note: these were undocumented at the time of
1156
       writing).  *)
1157
    Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane,
1158
      [S16; S32; U16; U32; F32];
1159
    Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane,
1160
      [S16; S32; U16; U32; F32];
1161
 
1162
    (* Multiply-accumulate, lane.  *)
1163
    Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane,
1164
      [S16; S32; U16; U32; F32];
1165
    Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane,
1166
      [S16; S32; U16; U32; F32];
1167
    Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane,
1168
      [S16; S32; U16; U32];
1169
    Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane",
1170
      elts_same_io_lane, [S16; S32];
1171
 
1172
    (* Multiply-subtract, lane.  *)
1173
    Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane,
1174
      [S16; S32; U16; U32; F32];
1175
    Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane,
1176
      [S16; S32; U16; U32; F32];
1177
    Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane,
1178
      [S16; S32; U16; U32];
1179
    Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane",
1180
      elts_same_io_lane, [S16; S32];
1181
 
1182
    (* Long multiply, lane.  *)
1183
    Vmull_lane, [],
1184
      Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32];
1185
 
1186
    (* Saturating doubling long multiply, lane.  *)
1187
    Vqdmull_lane, [Saturating; Doubling],
1188
      Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32];
1189
 
1190
    (* Saturating doubling long multiply high, lane.  *)
1191
    Vqdmulh_lane, [Saturating; Halving],
1192
      By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32];
1193
    Vqdmulh_lane, [Saturating; Halving],
1194
      By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32];
1195
    Vqdmulh_lane, [Saturating; Halving; Rounding;
1196
                   Instruction_name ["vqrdmulh"]],
1197
      By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32];
1198
    Vqdmulh_lane, [Saturating; Halving; Rounding;
1199
                   Instruction_name ["vqrdmulh"]],
1200
      By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32];
1201
 
1202
    (* Vector multiply by scalar.  *)
1203
    Vmul_n, [InfoWord;
1204
             Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1205
             Use_operands [| Dreg; Dreg; Corereg |], "vmul_n",
1206
      sign_invar_2, [S16; S32; U16; U32; F32];
1207
    Vmul_n, [InfoWord;
1208
             Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1209
             Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n",
1210
      sign_invar_2, [S16; S32; U16; U32; F32];
1211
 
1212
    (* Vector long multiply by scalar.  *)
1213
    Vmull_n, [Instruction_name ["vmull"];
1214
              Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]],
1215
              Wide_scalar, "vmull_n",
1216
      elts_same_2, [S16; S32; U16; U32];
1217
 
1218
    (* Vector saturating doubling long multiply by scalar.  *)
1219
    Vqdmull_n, [Saturating; Doubling;
1220
                Disassembles_as [Use_operands [| Qreg; Dreg;
1221
                                                 Element_of_dreg |]]],
1222
                Wide_scalar, "vqdmull_n",
1223
      elts_same_2, [S16; S32];
1224
 
1225
    (* Vector saturating doubling long multiply high by scalar.  *)
1226
    Vqdmulh_n,
1227
      [Saturating; Halving; InfoWord;
1228
       Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1229
      Use_operands [| Qreg; Qreg; Corereg |],
1230
      "vqdmulhQ_n", elts_same_2, [S16; S32];
1231
    Vqdmulh_n,
1232
      [Saturating; Halving; InfoWord;
1233
       Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1234
      Use_operands [| Dreg; Dreg; Corereg |],
1235
      "vqdmulh_n", elts_same_2, [S16; S32];
1236
    Vqdmulh_n,
1237
      [Saturating; Halving; Rounding; InfoWord;
1238
       Instruction_name ["vqrdmulh"];
1239
       Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1240
      Use_operands [| Qreg; Qreg; Corereg |],
1241
      "vqRdmulhQ_n", elts_same_2, [S16; S32];
1242
    Vqdmulh_n,
1243
      [Saturating; Halving; Rounding; InfoWord;
1244
       Instruction_name ["vqrdmulh"];
1245
       Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1246
      Use_operands [| Dreg; Dreg; Corereg |],
1247
      "vqRdmulh_n", elts_same_2, [S16; S32];
1248
 
1249
    (* Vector multiply-accumulate by scalar.  *)
1250
    Vmla_n, [InfoWord;
1251
             Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1252
      Use_operands [| Dreg; Dreg; Corereg |], "vmla_n",
1253
      sign_invar_io, [S16; S32; U16; U32; F32];
1254
    Vmla_n, [InfoWord;
1255
             Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1256
      Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n",
1257
      sign_invar_io, [S16; S32; U16; U32; F32];
1258
    Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32];
1259
    Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io,
1260
      [S16; S32];
1261
 
1262
    (* Vector multiply subtract by scalar.  *)
1263
    Vmls_n, [InfoWord;
1264
             Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1265
      Use_operands [| Dreg; Dreg; Corereg |], "vmls_n",
1266
      sign_invar_io, [S16; S32; U16; U32; F32];
1267
    Vmls_n, [InfoWord;
1268
             Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1269
      Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n",
1270
      sign_invar_io, [S16; S32; U16; U32; F32];
1271
    Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32];
1272
    Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io,
1273
      [S16; S32];
1274
 
1275
    (* Vector extract.  *)
1276
    Vext, [Const_valuator (fun _ -> 0)],
1277
      Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend,
1278
      pf_su_8_64;
1279
    Vext, [Const_valuator (fun _ -> 0)],
1280
      Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend,
1281
      pf_su_8_64;
1282
 
1283
    (* Reverse elements.  *)
1284
    Vrev64, [], All (2, Dreg), "vrev64", bits_1, P8 :: P16 :: F32 :: su_8_32;
1285
    Vrev64, [], All (2, Qreg), "vrev64Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1286
    Vrev32, [], All (2, Dreg), "vrev32", bits_1, [P8; P16; S8; U8; S16; U16];
1287
    Vrev32, [], All (2, Qreg), "vrev32Q", bits_1, [P8; P16; S8; U8; S16; U16];
1288
    Vrev16, [], All (2, Dreg), "vrev16", bits_1, [P8; S8; U8];
1289
    Vrev16, [], All (2, Qreg), "vrev16Q", bits_1, [P8; S8; U8];
1290
 
1291
    (* Bit selection.  *)
1292
    Vbsl,
1293
      [Instruction_name ["vbsl"; "vbit"; "vbif"];
1294
       Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]],
1295
      Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select,
1296
      pf_su_8_64;
1297
    Vbsl,
1298
      [Instruction_name ["vbsl"; "vbit"; "vbif"];
1299
       Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]],
1300
      Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
1301
      pf_su_8_64;
1302
 
1303
    (* Transpose elements.  **NOTE** ReturnPtr goes some of the way towards
1304
       generating good code for intrinsics which return structure types --
1305
       builtins work well by themselves (and understand that the values being
1306
       stored on e.g. the stack also reside in registers, so can optimise the
1307
       stores away entirely if the results are used immediately), but
1308
       intrinsics are very much less efficient. Maybe something can be improved
1309
       re: inlining, or tweaking the ABI used for intrinsics (a special call
1310
       attribute?).
1311
    *)
1312
    Vtrn, [ReturnPtr], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32;
1313
    Vtrn, [ReturnPtr], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
1314
 
1315
    (* Zip elements.  *)
1316
    Vzip, [ReturnPtr], Pair_result Dreg, "vzip", bits_2, pf_su_8_32;
1317
    Vzip, [ReturnPtr], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32;
1318
 
1319
    (* Unzip elements.  *)
1320
    Vuzp, [ReturnPtr], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32;
1321
    Vuzp, [ReturnPtr], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32;
1322
 
1323
    (* Element/structure loads.  VLD1 variants.  *)
1324
    Vldx 1,
1325
      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1326
                                        CstPtrTo Corereg |]]],
1327
      Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1,
1328
      pf_su_8_64;
1329
    Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1330
                                              CstPtrTo Corereg |]]],
1331
      Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1,
1332
      pf_su_8_64;
1333
 
1334
    Vldx_lane 1,
1335
      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1336
                                        CstPtrTo Corereg |]]],
1337
      Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1338
      "vld1_lane", bits_3, pf_su_8_32;
1339
    Vldx_lane 1,
1340
      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1341
                                        CstPtrTo Corereg |]];
1342
       Const_valuator (fun _ -> 0)],
1343
      Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1344
      "vld1_lane", bits_3, [S64; U64];
1345
    Vldx_lane 1,
1346
      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1347
                                        CstPtrTo Corereg |]]],
1348
      Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1349
      "vld1Q_lane", bits_3, pf_su_8_32;
1350
    Vldx_lane 1,
1351
      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1352
                                        CstPtrTo Corereg |]]],
1353
      Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1354
      "vld1Q_lane", bits_3, [S64; U64];
1355
 
1356
    Vldx_dup 1,
1357
      [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg);
1358
                                        CstPtrTo Corereg |]]],
1359
      Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1360
      bits_1, pf_su_8_32;
1361
    Vldx_dup 1,
1362
      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1363
                                        CstPtrTo Corereg |]]],
1364
      Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1365
      bits_1, [S64; U64];
1366
    Vldx_dup 1,
1367
      [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg);
1368
                                        CstPtrTo Corereg |]]],
1369
      Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1370
      bits_1, pf_su_8_32;
1371
    Vldx_dup 1,
1372
      [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1373
                                        CstPtrTo Corereg |]]],
1374
      Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1375
      bits_1, [S64; U64];
1376
 
1377
    (* VST1 variants.  *)
1378
    Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1379
                                              PtrTo Corereg |]]],
1380
      Use_operands [| PtrTo Corereg; Dreg |], "vst1",
1381
      store_1, pf_su_8_64;
1382
    Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1383
                                              PtrTo Corereg |]]],
1384
      Use_operands [| PtrTo Corereg; Qreg |], "vst1Q",
1385
      store_1, pf_su_8_64;
1386
 
1387
    Vstx_lane 1,
1388
      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1389
                                        CstPtrTo Corereg |]]],
1390
      Use_operands [| PtrTo Corereg; Dreg; Immed |],
1391
      "vst1_lane", store_3, pf_su_8_32;
1392
    Vstx_lane 1,
1393
      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1394
                                        CstPtrTo Corereg |]];
1395
       Const_valuator (fun _ -> 0)],
1396
      Use_operands [| PtrTo Corereg; Dreg; Immed |],
1397
      "vst1_lane", store_3, [U64; S64];
1398
    Vstx_lane 1,
1399
      [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1400
                                        CstPtrTo Corereg |]]],
1401
      Use_operands [| PtrTo Corereg; Qreg; Immed |],
1402
      "vst1Q_lane", store_3, pf_su_8_32;
1403
    Vstx_lane 1,
1404
      [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1405
                                        CstPtrTo Corereg |]]],
1406
      Use_operands [| PtrTo Corereg; Qreg; Immed |],
1407
      "vst1Q_lane", store_3, [U64; S64];
1408
 
1409
    (* VLD2 variants.  *)
1410
    Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1411
      "vld2", bits_1, pf_su_8_32;
1412
    Vldx 2, [Instruction_name ["vld1"]],
1413
       Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1414
      "vld2", bits_1, [S64; U64];
1415
    Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1416
                                              CstPtrTo Corereg |];
1417
                              Use_operands [| VecArray (2, Dreg);
1418
                                              CstPtrTo Corereg |]]],
1419
      Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |],
1420
      "vld2Q", bits_1, pf_su_8_32;
1421
 
1422
    Vldx_lane 2,
1423
      [Disassembles_as [Use_operands
1424
        [| VecArray (2, Element_of_dreg);
1425
           CstPtrTo Corereg |]]],
1426
      Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg;
1427
                      VecArray (2, Dreg); Immed |],
1428
      "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1429
    Vldx_lane 2,
1430
      [Disassembles_as [Use_operands
1431
        [| VecArray (2, Element_of_dreg);
1432
           CstPtrTo Corereg |]]],
1433
      Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg;
1434
                      VecArray (2, Qreg); Immed |],
1435
      "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1436
 
1437
    Vldx_dup 2,
1438
      [Disassembles_as [Use_operands
1439
        [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]],
1440
      Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1441
      "vld2_dup", bits_1, pf_su_8_32;
1442
    Vldx_dup 2,
1443
      [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1444
        [| VecArray (2, Dreg); CstPtrTo Corereg |]]],
1445
      Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1446
      "vld2_dup", bits_1, [S64; U64];
1447
 
1448
    (* VST2 variants.  *)
1449
    Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1450
                                              PtrTo Corereg |]]],
1451
      Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1452
      store_1, pf_su_8_32;
1453
    Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1454
                                              PtrTo Corereg |]];
1455
             Instruction_name ["vst1"]],
1456
      Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1457
      store_1, [S64; U64];
1458
    Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1459
                                              PtrTo Corereg |];
1460
                              Use_operands [| VecArray (2, Dreg);
1461
                                              PtrTo Corereg |]]],
1462
      Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q",
1463
      store_1, pf_su_8_32;
1464
 
1465
    Vstx_lane 2,
1466
      [Disassembles_as [Use_operands
1467
        [| VecArray (2, Element_of_dreg);
1468
           CstPtrTo Corereg |]]],
1469
      Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane",
1470
      store_3, P8 :: P16 :: F32 :: su_8_32;
1471
    Vstx_lane 2,
1472
      [Disassembles_as [Use_operands
1473
        [| VecArray (2, Element_of_dreg);
1474
           CstPtrTo Corereg |]]],
1475
      Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane",
1476
      store_3, [P16; F32; U16; U32; S16; S32];
1477
 
1478
    (* VLD3 variants.  *)
1479
    Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1480
      "vld3", bits_1, pf_su_8_32;
1481
    Vldx 3, [Instruction_name ["vld1"]],
1482
      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1483
      "vld3", bits_1, [S64; U64];
1484
    Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1485
                                              CstPtrTo Corereg |];
1486
                              Use_operands [| VecArray (3, Dreg);
1487
                                              CstPtrTo Corereg |]]],
1488
      Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |],
1489
      "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1490
 
1491
    Vldx_lane 3,
1492
      [Disassembles_as [Use_operands
1493
        [| VecArray (3, Element_of_dreg);
1494
           CstPtrTo Corereg |]]],
1495
      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg;
1496
                                     VecArray (3, Dreg); Immed |],
1497
      "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1498
    Vldx_lane 3,
1499
      [Disassembles_as [Use_operands
1500
        [| VecArray (3, Element_of_dreg);
1501
           CstPtrTo Corereg |]]],
1502
      Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg;
1503
                                     VecArray (3, Qreg); Immed |],
1504
      "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1505
 
1506
    Vldx_dup 3,
1507
      [Disassembles_as [Use_operands
1508
        [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]],
1509
      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1510
      "vld3_dup", bits_1, pf_su_8_32;
1511
    Vldx_dup 3,
1512
      [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1513
        [| VecArray (3, Dreg); CstPtrTo Corereg |]]],
1514
      Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1515
      "vld3_dup", bits_1, [S64; U64];
1516
 
1517
    (* VST3 variants.  *)
1518
    Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1519
                                              PtrTo Corereg |]]],
1520
      Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1521
      store_1, pf_su_8_32;
1522
    Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1523
                                              PtrTo Corereg |]];
1524
             Instruction_name ["vst1"]],
1525
      Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1526
      store_1, [S64; U64];
1527
    Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1528
                                              PtrTo Corereg |];
1529
                              Use_operands [| VecArray (3, Dreg);
1530
                                              PtrTo Corereg |]]],
1531
      Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q",
1532
      store_1, pf_su_8_32;
1533
 
1534
    Vstx_lane 3,
1535
      [Disassembles_as [Use_operands
1536
        [| VecArray (3, Element_of_dreg);
1537
           CstPtrTo Corereg |]]],
1538
      Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane",
1539
      store_3, P8 :: P16 :: F32 :: su_8_32;
1540
    Vstx_lane 3,
1541
      [Disassembles_as [Use_operands
1542
        [| VecArray (3, Element_of_dreg);
1543
           CstPtrTo Corereg |]]],
1544
      Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane",
1545
      store_3, [P16; F32; U16; U32; S16; S32];
1546
 
1547
    (* VLD4/VST4 variants.  *)
1548
    Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1549
      "vld4", bits_1, pf_su_8_32;
1550
    Vldx 4, [Instruction_name ["vld1"]],
1551
      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1552
      "vld4", bits_1, [S64; U64];
1553
    Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1554
                                              CstPtrTo Corereg |];
1555
                              Use_operands [| VecArray (4, Dreg);
1556
                                              CstPtrTo Corereg |]]],
1557
      Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |],
1558
      "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1559
 
1560
    Vldx_lane 4,
1561
      [Disassembles_as [Use_operands
1562
        [| VecArray (4, Element_of_dreg);
1563
           CstPtrTo Corereg |]]],
1564
      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg;
1565
                                     VecArray (4, Dreg); Immed |],
1566
      "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1567
    Vldx_lane 4,
1568
      [Disassembles_as [Use_operands
1569
        [| VecArray (4, Element_of_dreg);
1570
           CstPtrTo Corereg |]]],
1571
      Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg;
1572
                      VecArray (4, Qreg); Immed |],
1573
      "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1574
 
1575
    Vldx_dup 4,
1576
      [Disassembles_as [Use_operands
1577
        [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]],
1578
      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1579
      "vld4_dup", bits_1, pf_su_8_32;
1580
    Vldx_dup 4,
1581
      [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1582
        [| VecArray (4, Dreg); CstPtrTo Corereg |]]],
1583
      Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1584
      "vld4_dup", bits_1, [S64; U64];
1585
 
1586
    Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1587
                                              PtrTo Corereg |]]],
1588
      Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1589
      store_1, pf_su_8_32;
1590
    Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1591
                                              PtrTo Corereg |]];
1592
             Instruction_name ["vst1"]],
1593
      Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1594
      store_1, [S64; U64];
1595
    Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1596
                                              PtrTo Corereg |];
1597
                              Use_operands [| VecArray (4, Dreg);
1598
                                              PtrTo Corereg |]]],
1599
     Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q",
1600
      store_1, pf_su_8_32;
1601
 
1602
    Vstx_lane 4,
1603
      [Disassembles_as [Use_operands
1604
        [| VecArray (4, Element_of_dreg);
1605
           CstPtrTo Corereg |]]],
1606
      Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane",
1607
      store_3, P8 :: P16 :: F32 :: su_8_32;
1608
    Vstx_lane 4,
1609
      [Disassembles_as [Use_operands
1610
        [| VecArray (4, Element_of_dreg);
1611
           CstPtrTo Corereg |]]],
1612
      Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane",
1613
      store_3, [P16; F32; U16; U32; S16; S32];
1614
 
1615
    (* Logical operations. And.  *)
1616
    Vand, [], All (3, Dreg), "vand", notype_2, su_8_64;
1617
    Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64;
1618
 
1619
    (* Or.  *)
1620
    Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_64;
1621
    Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64;
1622
 
1623
    (* Eor.  *)
1624
    Veor, [], All (3, Dreg), "veor", notype_2, su_8_64;
1625
    Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64;
1626
 
1627
    (* Bic (And-not).  *)
1628
    Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_64;
1629
    Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64;
1630
 
1631
    (* Or-not.  *)
1632
    Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_64;
1633
    Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64;
1634
  ]
1635
 
1636
let reinterp =
1637
  let elems = P8 :: P16 :: F32 :: su_8_64 in
1638
  List.fold_right
1639
    (fun convto acc ->
1640
      let types = List.fold_right
1641
        (fun convfrom acc ->
1642
          if convfrom <> convto then
1643
            Cast (convto, convfrom) :: acc
1644
          else
1645
            acc)
1646
        elems
1647
        []
1648
      in
1649
        let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |],
1650
                      "vreinterpret", conv_1, types
1651
        and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |],
1652
                      "vreinterpretQ", conv_1, types in
1653
        dconv :: qconv :: acc)
1654
    elems
1655
    []
1656
 
1657
(* Output routines.  *)
1658
 
1659
let rec string_of_elt = function
1660
    S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64"
1661
  | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
1662
  | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
1663
  | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
1664
  | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
1665
  | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
1666
  | NoElts -> failwith "No elts"
1667
 
1668
let string_of_elt_dots elt =
1669
  match elt with
1670
    Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b
1671
  | _ -> string_of_elt elt
1672
 
1673
let string_of_vectype vt =
1674
  let rec name affix = function
1675
    T_int8x8 -> affix "int8x8"
1676
  | T_int8x16 -> affix "int8x16"
1677
  | T_int16x4 -> affix "int16x4"
1678
  | T_int16x8 -> affix "int16x8"
1679
  | T_int32x2 -> affix "int32x2"
1680
  | T_int32x4 -> affix "int32x4"
1681
  | T_int64x1 -> affix "int64x1"
1682
  | T_int64x2 -> affix "int64x2"
1683
  | T_uint8x8 -> affix "uint8x8"
1684
  | T_uint8x16 -> affix "uint8x16"
1685
  | T_uint16x4 -> affix "uint16x4"
1686
  | T_uint16x8 -> affix "uint16x8"
1687
  | T_uint32x2 -> affix "uint32x2"
1688
  | T_uint32x4 -> affix "uint32x4"
1689
  | T_uint64x1 -> affix "uint64x1"
1690
  | T_uint64x2 -> affix "uint64x2"
1691
  | T_float32x2 -> affix "float32x2"
1692
  | T_float32x4 -> affix "float32x4"
1693
  | T_poly8x8 -> affix "poly8x8"
1694
  | T_poly8x16 -> affix "poly8x16"
1695
  | T_poly16x4 -> affix "poly16x4"
1696
  | T_poly16x8 -> affix "poly16x8"
1697
  | T_int8 -> affix "int8"
1698
  | T_int16 -> affix "int16"
1699
  | T_int32 -> affix "int32"
1700
  | T_int64 -> affix "int64"
1701
  | T_uint8 -> affix "uint8"
1702
  | T_uint16 -> affix "uint16"
1703
  | T_uint32 -> affix "uint32"
1704
  | T_uint64 -> affix "uint64"
1705
  | T_poly8 -> affix "poly8"
1706
  | T_poly16 -> affix "poly16"
1707
  | T_float32 -> affix "float32"
1708
  | T_immediate _ -> "const int"
1709
  | T_void -> "void"
1710
  | T_intQI -> "__builtin_neon_qi"
1711
  | T_intHI -> "__builtin_neon_hi"
1712
  | T_intSI -> "__builtin_neon_si"
1713
  | T_intDI -> "__builtin_neon_di"
1714
  | T_floatSF -> "__builtin_neon_sf"
1715
  | T_arrayof (num, base) ->
1716
      let basename = name (fun x -> x) base in
1717
      affix (Printf.sprintf "%sx%d" basename num)
1718
  | T_ptrto x ->
1719
      let basename = name affix x in
1720
      Printf.sprintf "%s *" basename
1721
  | T_const x ->
1722
      let basename = name affix x in
1723
      Printf.sprintf "const %s" basename
1724
  in
1725
    name (fun x -> x ^ "_t") vt
1726
 
1727
let string_of_inttype = function
1728
    B_TImode -> "__builtin_neon_ti"
1729
  | B_EImode -> "__builtin_neon_ei"
1730
  | B_OImode -> "__builtin_neon_oi"
1731
  | B_CImode -> "__builtin_neon_ci"
1732
  | B_XImode -> "__builtin_neon_xi"
1733
 
1734
let string_of_mode = function
1735
    V8QI -> "v8qi" | V4HI  -> "v4hi"  | V2SI -> "v2si" | V2SF -> "v2sf"
1736
  | DI   -> "di"   | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
1737
  | V4SF -> "v4sf" | V2DI  -> "v2di"  | QI -> "qi" | HI -> "hi" | SI -> "si"
1738
  | SF -> "sf"
1739
 
1740
(* Use uppercase chars for letters which form part of the intrinsic name, but
1741
   should be omitted from the builtin name (the info is passed in an extra
1742
   argument, instead).  *)
1743
let intrinsic_name name = String.lowercase name
1744
 
1745
(* Allow the name of the builtin to be overridden by things (e.g. Flipped)
1746
   found in the features list.  *)
1747
let builtin_name features name =
1748
  let name = List.fold_right
1749
               (fun el name ->
1750
                 match el with
1751
                   Flipped x | Builtin_name x -> x
1752
                 | _ -> name)
1753
               features name in
1754
  let islower x = let str = String.make 1 x in (String.lowercase str) = str
1755
  and buf = Buffer.create (String.length name) in
1756
  String.iter (fun c -> if islower c then Buffer.add_char buf c) name;
1757
  Buffer.contents buf
1758
 
1759
(* Transform an arity into a list of strings.  *)
1760
let strings_of_arity a =
1761
  match a with
1762
  | Arity0 vt -> [string_of_vectype vt]
1763
  | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2]
1764
  | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1;
1765
                               string_of_vectype vt2;
1766
                               string_of_vectype vt3]
1767
  | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1;
1768
                                    string_of_vectype vt2;
1769
                                    string_of_vectype vt3;
1770
                                    string_of_vectype vt4]
1771
  | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1;
1772
                                         string_of_vectype vt2;
1773
                                         string_of_vectype vt3;
1774
                                         string_of_vectype vt4;
1775
                                         string_of_vectype vt5]
1776
 
1777
(* Suffixes on the end of builtin names that are to be stripped in order
1778
   to obtain the name used as an instruction.  They are only stripped if
1779
   preceded immediately by an underscore.  *)
1780
let suffixes_to_strip = [ "n"; "lane"; "dup" ]
1781
 
1782
(* Get the possible names of an instruction corresponding to a "name" from the
1783
   ops table.  This is done by getting the equivalent builtin name and
1784
   stripping any suffixes from the list at the top of this file, unless
1785
   the features list presents with an Instruction_name entry, in which
1786
   case that is used; or unless the features list presents with a Flipped
1787
   entry, in which case that is used.  If both such entries are present,
1788
   the first in the list will be chosen.  *)
1789
let get_insn_names features name =
1790
  let names = try
1791
  begin
1792
    match List.find (fun feature -> match feature with
1793
                                      Instruction_name _ -> true
1794
                                    | Flipped _ -> true
1795
                                    | _ -> false) features
1796
    with
1797
      Instruction_name names -> names
1798
    | Flipped name -> [name]
1799
    | _ -> assert false
1800
  end
1801
  with Not_found -> [builtin_name features name]
1802
  in
1803
  begin
1804
    List.map (fun name' ->
1805
      try
1806
        let underscore = String.rindex name' '_' in
1807
        let our_suffix = String.sub name' (underscore + 1)
1808
                                    ((String.length name') - underscore - 1)
1809
        in
1810
          let rec strip remaining_suffixes =
1811
            match remaining_suffixes with
1812
              [] -> name'
1813
            | s::ss when our_suffix = s -> String.sub name' 0 underscore
1814
            | _::ss -> strip ss
1815
          in
1816
            strip suffixes_to_strip
1817
      with (Not_found | Invalid_argument _) -> name') names
1818
  end
1819
 
1820
(* Apply a function to each element of a list and then comma-separate
1821
   the resulting strings.  *)
1822
let rec commas f elts acc =
1823
  match elts with
1824
    [] -> acc
1825
  | [elt] -> acc ^ (f elt)
1826
  | elt::elts ->
1827
    commas f elts (acc ^ (f elt) ^ ", ")
1828
 
1829
(* Given a list of features and the shape specified in the "ops" table, apply
1830
   a function to each possible shape that the instruction may have.
1831
   By default, this is the "shape" entry in "ops".  If the features list
1832
   contains a Disassembles_as entry, the shapes contained in that entry are
1833
   mapped to corresponding outputs and returned in a list.  If there is more
1834
   than one Disassembles_as entry, only the first is used.  *)
1835
let analyze_all_shapes features shape f =
1836
  try
1837
    match List.find (fun feature ->
1838
                       match feature with Disassembles_as _ -> true
1839
                                        | _ -> false)
1840
                    features with
1841
      Disassembles_as shapes -> List.map f shapes
1842
    | _ -> assert false
1843
  with Not_found -> [f shape]
1844
 

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.