OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [config/] [i386/] [sse.md] - Blame information for rev 709

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 709 jeremybenn
;; GCC machine description for SSE instructions
2
;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3
;; Free Software Foundation, Inc.
4
;;
5
;; This file is part of GCC.
6
;;
7
;; GCC is free software; you can redistribute it and/or modify
8
;; it under the terms of the GNU General Public License as published by
9
;; the Free Software Foundation; either version 3, or (at your option)
10
;; any later version.
11
;;
12
;; GCC is distributed in the hope that it will be useful,
13
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
;; GNU General Public License for more details.
16
;;
17
;; You should have received a copy of the GNU General Public License
18
;; along with GCC; see the file COPYING3.  If not see
19
;; .
20
 
21
(define_c_enum "unspec" [
22
  ;; SSE
23
  UNSPEC_MOVNT
24
  UNSPEC_MOVU
25
 
26
  ;; SSE3
27
  UNSPEC_LDDQU
28
 
29
  ;; SSSE3
30
  UNSPEC_PSHUFB
31
  UNSPEC_PSIGN
32
  UNSPEC_PALIGNR
33
 
34
  ;; For SSE4A support
35
  UNSPEC_EXTRQI
36
  UNSPEC_EXTRQ
37
  UNSPEC_INSERTQI
38
  UNSPEC_INSERTQ
39
 
40
  ;; For SSE4.1 support
41
  UNSPEC_BLENDV
42
  UNSPEC_INSERTPS
43
  UNSPEC_DP
44
  UNSPEC_MOVNTDQA
45
  UNSPEC_MPSADBW
46
  UNSPEC_PHMINPOSUW
47
  UNSPEC_PTEST
48
 
49
  ;; For SSE4.2 support
50
  UNSPEC_PCMPESTR
51
  UNSPEC_PCMPISTR
52
 
53
  ;; For FMA4 support
54
  UNSPEC_FMADDSUB
55
  UNSPEC_XOP_UNSIGNED_CMP
56
  UNSPEC_XOP_TRUEFALSE
57
  UNSPEC_XOP_PERMUTE
58
  UNSPEC_FRCZ
59
 
60
  ;; For AES support
61
  UNSPEC_AESENC
62
  UNSPEC_AESENCLAST
63
  UNSPEC_AESDEC
64
  UNSPEC_AESDECLAST
65
  UNSPEC_AESIMC
66
  UNSPEC_AESKEYGENASSIST
67
 
68
  ;; For PCLMUL support
69
  UNSPEC_PCLMUL
70
 
71
  ;; For AVX support
72
  UNSPEC_PCMP
73
  UNSPEC_VPERMIL
74
  UNSPEC_VPERMIL2
75
  UNSPEC_VPERMIL2F128
76
  UNSPEC_CAST
77
  UNSPEC_VTESTP
78
  UNSPEC_VCVTPH2PS
79
  UNSPEC_VCVTPS2PH
80
 
81
  ;; For AVX2 support
82
  UNSPEC_VPERMSI
83
  UNSPEC_VPERMDF
84
  UNSPEC_VPERMSF
85
  UNSPEC_VPERMTI
86
  UNSPEC_GATHER
87
  UNSPEC_VSIBADDR
88
])
89
 
90
(define_c_enum "unspecv" [
91
  UNSPECV_LDMXCSR
92
  UNSPECV_STMXCSR
93
  UNSPECV_CLFLUSH
94
  UNSPECV_MONITOR
95
  UNSPECV_MWAIT
96
  UNSPECV_VZEROALL
97
  UNSPECV_VZEROUPPER
98
])
99
 
100
;; All vector modes including V?TImode, used in move patterns.
101
(define_mode_iterator V16
102
  [(V32QI "TARGET_AVX") V16QI
103
   (V16HI "TARGET_AVX") V8HI
104
   (V8SI "TARGET_AVX") V4SI
105
   (V4DI "TARGET_AVX") V2DI
106
   (V2TI "TARGET_AVX") V1TI
107
   (V8SF "TARGET_AVX") V4SF
108
   (V4DF "TARGET_AVX") V2DF])
109
 
110
;; All vector modes
111
(define_mode_iterator V
112
  [(V32QI "TARGET_AVX") V16QI
113
   (V16HI "TARGET_AVX") V8HI
114
   (V8SI "TARGET_AVX") V4SI
115
   (V4DI "TARGET_AVX") V2DI
116
   (V8SF "TARGET_AVX") V4SF
117
   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
118
 
119
;; All 128bit vector modes
120
(define_mode_iterator V_128
121
  [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
122
 
123
;; All 256bit vector modes
124
(define_mode_iterator V_256
125
  [V32QI V16HI V8SI V4DI V8SF V4DF])
126
 
127
;; All vector float modes
128
(define_mode_iterator VF
129
  [(V8SF "TARGET_AVX") V4SF
130
   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
131
 
132
;; All SFmode vector float modes
133
(define_mode_iterator VF1
134
  [(V8SF "TARGET_AVX") V4SF])
135
 
136
;; All DFmode vector float modes
137
(define_mode_iterator VF2
138
  [(V4DF "TARGET_AVX") V2DF])
139
 
140
;; All 128bit vector float modes
141
(define_mode_iterator VF_128
142
  [V4SF (V2DF "TARGET_SSE2")])
143
 
144
;; All 256bit vector float modes
145
(define_mode_iterator VF_256
146
  [V8SF V4DF])
147
 
148
;; All vector integer modes
149
(define_mode_iterator VI
150
  [(V32QI "TARGET_AVX") V16QI
151
   (V16HI "TARGET_AVX") V8HI
152
   (V8SI "TARGET_AVX") V4SI
153
   (V4DI "TARGET_AVX") V2DI])
154
 
155
(define_mode_iterator VI_AVX2
156
  [(V32QI "TARGET_AVX2") V16QI
157
   (V16HI "TARGET_AVX2") V8HI
158
   (V8SI "TARGET_AVX2") V4SI
159
   (V4DI "TARGET_AVX2") V2DI])
160
 
161
;; All QImode vector integer modes
162
(define_mode_iterator VI1
163
  [(V32QI "TARGET_AVX") V16QI])
164
 
165
;; All DImode vector integer modes
166
(define_mode_iterator VI8
167
  [(V4DI "TARGET_AVX") V2DI])
168
 
169
(define_mode_iterator VI1_AVX2
170
  [(V32QI "TARGET_AVX2") V16QI])
171
 
172
(define_mode_iterator VI2_AVX2
173
  [(V16HI "TARGET_AVX2") V8HI])
174
 
175
(define_mode_iterator VI4_AVX2
176
  [(V8SI "TARGET_AVX2") V4SI])
177
 
178
(define_mode_iterator VI8_AVX2
179
  [(V4DI "TARGET_AVX2") V2DI])
180
 
181
;; ??? We should probably use TImode instead.
182
(define_mode_iterator VIMAX_AVX2
183
  [(V2TI "TARGET_AVX2") V1TI])
184
 
185
;; ??? This should probably be dropped in favor of VIMAX_AVX2.
186
(define_mode_iterator SSESCALARMODE
187
  [(V2TI "TARGET_AVX2") TI])
188
 
189
(define_mode_iterator VI12_AVX2
190
  [(V32QI "TARGET_AVX2") V16QI
191
   (V16HI "TARGET_AVX2") V8HI])
192
 
193
(define_mode_iterator VI24_AVX2
194
  [(V16HI "TARGET_AVX2") V8HI
195
   (V8SI "TARGET_AVX2") V4SI])
196
 
197
(define_mode_iterator VI124_AVX2
198
  [(V32QI "TARGET_AVX2") V16QI
199
   (V16HI "TARGET_AVX2") V8HI
200
   (V8SI "TARGET_AVX2") V4SI])
201
 
202
(define_mode_iterator VI248_AVX2
203
  [(V16HI "TARGET_AVX2") V8HI
204
   (V8SI "TARGET_AVX2") V4SI
205
   (V4DI "TARGET_AVX2") V2DI])
206
 
207
(define_mode_iterator VI48_AVX2
208
  [(V8SI "TARGET_AVX2") V4SI
209
   (V4DI "TARGET_AVX2") V2DI])
210
 
211
(define_mode_iterator V48_AVX2
212
  [V4SF V2DF
213
   V8SF V4DF
214
   (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
215
   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
216
 
217
(define_mode_attr sse2_avx2
218
  [(V16QI "sse2") (V32QI "avx2")
219
   (V8HI "sse2") (V16HI "avx2")
220
   (V4SI "sse2") (V8SI "avx2")
221
   (V2DI "sse2") (V4DI "avx2")
222
   (V1TI "sse2") (V2TI "avx2")])
223
 
224
(define_mode_attr ssse3_avx2
225
   [(V16QI "ssse3") (V32QI "avx2")
226
    (V8HI "ssse3") (V16HI "avx2")
227
    (V4SI "ssse3") (V8SI "avx2")
228
    (V2DI "ssse3") (V4DI "avx2")
229
    (TI "ssse3") (V2TI "avx2")])
230
 
231
(define_mode_attr sse4_1_avx2
232
   [(V16QI "sse4_1") (V32QI "avx2")
233
    (V8HI "sse4_1") (V16HI "avx2")
234
    (V4SI "sse4_1") (V8SI "avx2")
235
    (V2DI "sse4_1") (V4DI "avx2")])
236
 
237
(define_mode_attr avx_avx2
238
  [(V4SF "avx") (V2DF "avx")
239
   (V8SF "avx") (V4DF "avx")
240
   (V4SI "avx2") (V2DI "avx2")
241
   (V8SI "avx2") (V4DI "avx2")])
242
 
243
(define_mode_attr vec_avx2
244
  [(V16QI "vec") (V32QI "avx2")
245
   (V8HI "vec") (V16HI "avx2")
246
   (V4SI "vec") (V8SI "avx2")
247
   (V2DI "vec") (V4DI "avx2")])
248
 
249
(define_mode_attr ssedoublemode
250
  [(V16HI "V16SI") (V8HI "V8SI")])
251
 
252
(define_mode_attr ssebytemode
253
  [(V4DI "V32QI") (V2DI "V16QI")])
254
 
255
;; All 128bit vector integer modes
256
(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
257
 
258
;; All 256bit vector integer modes
259
(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
260
 
261
;; Random 128bit vector integer mode combinations
262
(define_mode_iterator VI12_128 [V16QI V8HI])
263
(define_mode_iterator VI14_128 [V16QI V4SI])
264
(define_mode_iterator VI124_128 [V16QI V8HI V4SI])
265
(define_mode_iterator VI128_128 [V16QI V8HI V2DI])
266
(define_mode_iterator VI24_128 [V8HI V4SI])
267
(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
268
(define_mode_iterator VI48_128 [V4SI V2DI])
269
 
270
;; Random 256bit vector integer mode combinations
271
(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
272
(define_mode_iterator VI48_256 [V8SI V4DI])
273
 
274
;; Int-float size matches
275
(define_mode_iterator VI4F_128 [V4SI V4SF])
276
(define_mode_iterator VI8F_128 [V2DI V2DF])
277
(define_mode_iterator VI4F_256 [V8SI V8SF])
278
(define_mode_iterator VI8F_256 [V4DI V4DF])
279
 
280
;; Mapping from float mode to required SSE level
281
(define_mode_attr sse
282
  [(SF "sse") (DF "sse2")
283
   (V4SF "sse") (V2DF "sse2")
284
   (V8SF "avx") (V4DF "avx")])
285
 
286
(define_mode_attr sse2
287
  [(V16QI "sse2") (V32QI "avx")
288
   (V2DI "sse2") (V4DI "avx")])
289
 
290
(define_mode_attr sse3
291
  [(V16QI "sse3") (V32QI "avx")])
292
 
293
(define_mode_attr sse4_1
294
  [(V4SF "sse4_1") (V2DF "sse4_1")
295
   (V8SF "avx") (V4DF "avx")])
296
 
297
(define_mode_attr avxsizesuffix
298
  [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
299
   (V16QI "") (V8HI "") (V4SI "") (V2DI "")
300
   (V8SF "256") (V4DF "256")
301
   (V4SF "") (V2DF "")])
302
 
303
;; SSE instruction mode
304
(define_mode_attr sseinsnmode
305
  [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
306
   (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
307
   (V8SF "V8SF") (V4DF "V4DF")
308
   (V4SF "V4SF") (V2DF "V2DF")
309
   (TI "TI")])
310
 
311
;; Mapping of vector float modes to an integer mode of the same size
312
(define_mode_attr sseintvecmode
313
  [(V8SF "V8SI") (V4DF "V4DI")
314
   (V4SF "V4SI") (V2DF "V2DI")
315
   (V8SI "V8SI") (V4DI "V4DI")
316
   (V4SI "V4SI") (V2DI "V2DI")
317
   (V16HI "V16HI") (V8HI "V8HI")
318
   (V32QI "V32QI") (V16QI "V16QI")])
319
 
320
(define_mode_attr sseintvecmodelower
321
  [(V8SF "v8si") (V4DF "v4di")
322
   (V4SF "v4si") (V2DF "v2di")
323
   (V8SI "v8si") (V4DI "v4di")
324
   (V4SI "v4si") (V2DI "v2di")
325
   (V16HI "v16hi") (V8HI "v8hi")
326
   (V32QI "v32qi") (V16QI "v16qi")])
327
 
328
;; Mapping of vector modes to a vector mode of double size
329
(define_mode_attr ssedoublevecmode
330
  [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
331
   (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
332
   (V8SF "V16SF") (V4DF "V8DF")
333
   (V4SF "V8SF") (V2DF "V4DF")])
334
 
335
;; Mapping of vector modes to a vector mode of half size
336
(define_mode_attr ssehalfvecmode
337
  [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
338
   (V16QI  "V8QI") (V8HI  "V4HI") (V4SI "V2SI")
339
   (V8SF "V4SF") (V4DF "V2DF")
340
   (V4SF "V2SF")])
341
 
342
;; Mapping of vector modes back to the scalar modes
343
(define_mode_attr ssescalarmode
344
  [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
345
   (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
346
   (V8SF "SF") (V4DF "DF")
347
   (V4SF "SF") (V2DF "DF")])
348
 
349
;; Number of scalar elements in each vector type
350
(define_mode_attr ssescalarnum
351
  [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
352
   (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
353
   (V8SF "8") (V4DF "4")
354
   (V4SF "4") (V2DF "2")])
355
 
356
;; SSE prefix for integer vector modes
357
(define_mode_attr sseintprefix
358
  [(V2DI "p") (V2DF "")
359
   (V4DI "p") (V4DF "")
360
   (V4SI "p") (V4SF "")
361
   (V8SI "p") (V8SF "")])
362
 
363
;; SSE scalar suffix for vector modes
364
(define_mode_attr ssescalarmodesuffix
365
  [(SF "ss") (DF "sd")
366
   (V8SF "ss") (V4DF "sd")
367
   (V4SF "ss") (V2DF "sd")
368
   (V8SI "ss") (V4DI "sd")
369
   (V4SI "d")])
370
 
371
;; Pack/unpack vector modes
372
(define_mode_attr sseunpackmode
373
  [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
374
   (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
375
 
376
(define_mode_attr ssepackmode
377
  [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
378
   (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
379
 
380
;; Mapping of the max integer size for xop rotate immediate constraint
381
(define_mode_attr sserotatemax
382
  [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
383
 
384
;; Mapping of mode to cast intrinsic name
385
(define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
386
 
387
;; Instruction suffix for sign and zero extensions.
388
(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
389
 
390
;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
391
(define_mode_attr i128
392
  [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
393
   (V8SI "%~128") (V4DI "%~128")])
394
 
395
;; Mix-n-match
396
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
397
 
398
(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
399
 
400
;; Mapping of immediate bits for blend instructions
401
(define_mode_attr blendbits
402
  [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
403
 
404
;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
405
 
406
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
407
;;
408
;; Move patterns
409
;;
410
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
411
 
412
;; All of these patterns are enabled for SSE1 as well as SSE2.
413
;; This is essential for maintaining stable calling conventions.
414
 
415
(define_expand "mov"
416
  [(set (match_operand:V16 0 "nonimmediate_operand" "")
417
        (match_operand:V16 1 "nonimmediate_operand" ""))]
418
  "TARGET_SSE"
419
{
420
  ix86_expand_vector_move (mode, operands);
421
  DONE;
422
})
423
 
424
(define_insn "*mov_internal"
425
  [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
426
        (match_operand:V16 1 "nonimmediate_or_sse_const_operand"  "C ,xm,x"))]
427
  "TARGET_SSE
428
   && (register_operand (operands[0], mode)
429
       || register_operand (operands[1], mode))"
430
{
431
  switch (which_alternative)
432
    {
433
    case 0:
434
      return standard_sse_constant_opcode (insn, operands[1]);
435
    case 1:
436
    case 2:
437
      switch (get_attr_mode (insn))
438
        {
439
        case MODE_V8SF:
440
        case MODE_V4SF:
441
          if (TARGET_AVX
442
              && (misaligned_operand (operands[0], mode)
443
                  || misaligned_operand (operands[1], mode)))
444
            return "vmovups\t{%1, %0|%0, %1}";
445
          else
446
            return "%vmovaps\t{%1, %0|%0, %1}";
447
 
448
        case MODE_V4DF:
449
        case MODE_V2DF:
450
          if (TARGET_AVX
451
              && (misaligned_operand (operands[0], mode)
452
                  || misaligned_operand (operands[1], mode)))
453
            return "vmovupd\t{%1, %0|%0, %1}";
454
          else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
455
            return "%vmovaps\t{%1, %0|%0, %1}";
456
          else
457
            return "%vmovapd\t{%1, %0|%0, %1}";
458
 
459
        case MODE_OI:
460
        case MODE_TI:
461
          if (TARGET_AVX
462
              && (misaligned_operand (operands[0], mode)
463
                  || misaligned_operand (operands[1], mode)))
464
            return "vmovdqu\t{%1, %0|%0, %1}";
465
          else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
466
            return "%vmovaps\t{%1, %0|%0, %1}";
467
          else
468
            return "%vmovdqa\t{%1, %0|%0, %1}";
469
 
470
        default:
471
          gcc_unreachable ();
472
        }
473
    default:
474
      gcc_unreachable ();
475
    }
476
}
477
  [(set_attr "type" "sselog1,ssemov,ssemov")
478
   (set_attr "prefix" "maybe_vex")
479
   (set (attr "mode")
480
        (cond [(match_test "TARGET_AVX")
481
                 (const_string "")
482
               (ior (ior (match_test "optimize_function_for_size_p (cfun)")
483
                         (not (match_test "TARGET_SSE2")))
484
                    (and (eq_attr "alternative" "2")
485
                         (match_test "TARGET_SSE_TYPELESS_STORES")))
486
                 (const_string "V4SF")
487
               (eq (const_string "mode") (const_string "V4SFmode"))
488
                 (const_string "V4SF")
489
               (eq (const_string "mode") (const_string "V2DFmode"))
490
                 (const_string "V2DF")
491
              ]
492
          (const_string "TI")))])
493
 
494
(define_insn "sse2_movq128"
495
  [(set (match_operand:V2DI 0 "register_operand" "=x")
496
        (vec_concat:V2DI
497
          (vec_select:DI
498
            (match_operand:V2DI 1 "nonimmediate_operand" "xm")
499
            (parallel [(const_int 0)]))
500
          (const_int 0)))]
501
  "TARGET_SSE2"
502
  "%vmovq\t{%1, %0|%0, %1}"
503
  [(set_attr "type" "ssemov")
504
   (set_attr "prefix" "maybe_vex")
505
   (set_attr "mode" "TI")])
506
 
507
;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
508
;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
509
;; from memory, we'd prefer to load the memory directly into the %xmm
510
;; register.  To facilitate this happy circumstance, this pattern won't
511
;; split until after register allocation.  If the 64-bit value didn't
512
;; come from memory, this is the best we can do.  This is much better
513
;; than storing %edx:%eax into a stack temporary and loading an %xmm
514
;; from there.
515
 
516
(define_insn_and_split "movdi_to_sse"
517
  [(parallel
518
    [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
519
          (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
520
     (clobber (match_scratch:V4SI 2 "=&x,X"))])]
521
  "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
522
  "#"
523
  "&& reload_completed"
524
  [(const_int 0)]
525
{
526
 if (register_operand (operands[1], DImode))
527
   {
528
      /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
529
         Assemble the 64-bit DImode value in an xmm register.  */
530
      emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
531
                                  gen_rtx_SUBREG (SImode, operands[1], 0)));
532
      emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
533
                                  gen_rtx_SUBREG (SImode, operands[1], 4)));
534
      emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
535
                                             operands[2]));
536
    }
537
 else if (memory_operand (operands[1], DImode))
538
   emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
539
                                  operands[1], const0_rtx));
540
 else
541
   gcc_unreachable ();
542
})
543
 
544
(define_split
545
  [(set (match_operand:V4SF 0 "register_operand" "")
546
        (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
547
  "TARGET_SSE && reload_completed"
548
  [(set (match_dup 0)
549
        (vec_merge:V4SF
550
          (vec_duplicate:V4SF (match_dup 1))
551
          (match_dup 2)
552
          (const_int 1)))]
553
{
554
  operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
555
  operands[2] = CONST0_RTX (V4SFmode);
556
})
557
 
558
(define_split
559
  [(set (match_operand:V2DF 0 "register_operand" "")
560
        (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
561
  "TARGET_SSE2 && reload_completed"
562
  [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
563
{
564
  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
565
  operands[2] = CONST0_RTX (DFmode);
566
})
567
 
568
(define_expand "push1"
569
  [(match_operand:V16 0 "register_operand" "")]
570
  "TARGET_SSE"
571
{
572
  ix86_expand_push (mode, operands[0]);
573
  DONE;
574
})
575
 
576
(define_expand "movmisalign"
577
  [(set (match_operand:V16 0 "nonimmediate_operand" "")
578
        (match_operand:V16 1 "nonimmediate_operand" ""))]
579
  "TARGET_SSE"
580
{
581
  ix86_expand_vector_move_misalign (mode, operands);
582
  DONE;
583
})
584
 
585
(define_expand "_movu"
586
  [(set (match_operand:VF 0 "nonimmediate_operand" "")
587
        (unspec:VF
588
          [(match_operand:VF 1 "nonimmediate_operand" "")]
589
          UNSPEC_MOVU))]
590
  "TARGET_SSE"
591
{
592
  if (MEM_P (operands[0]) && MEM_P (operands[1]))
593
    operands[1] = force_reg (mode, operands[1]);
594
})
595
 
596
(define_insn "*_movu"
597
  [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
598
        (unspec:VF
599
          [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
600
          UNSPEC_MOVU))]
601
  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
602
  "%vmovu\t{%1, %0|%0, %1}"
603
  [(set_attr "type" "ssemov")
604
   (set_attr "movu" "1")
605
   (set_attr "prefix" "maybe_vex")
606
   (set_attr "mode" "")])
607
 
608
(define_expand "_movdqu"
609
  [(set (match_operand:VI1 0 "nonimmediate_operand" "")
610
        (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
611
                    UNSPEC_MOVU))]
612
  "TARGET_SSE2"
613
{
614
  if (MEM_P (operands[0]) && MEM_P (operands[1]))
615
    operands[1] = force_reg (mode, operands[1]);
616
})
617
 
618
(define_insn "*_movdqu"
619
  [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
620
        (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
621
                    UNSPEC_MOVU))]
622
  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
623
  "%vmovdqu\t{%1, %0|%0, %1}"
624
  [(set_attr "type" "ssemov")
625
   (set_attr "movu" "1")
626
   (set (attr "prefix_data16")
627
     (if_then_else
628
       (match_test "TARGET_AVX")
629
     (const_string "*")
630
     (const_string "1")))
631
   (set_attr "prefix" "maybe_vex")
632
   (set_attr "mode" "")])
633
 
634
(define_insn "_lddqu"
635
  [(set (match_operand:VI1 0 "register_operand" "=x")
636
        (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
637
                    UNSPEC_LDDQU))]
638
  "TARGET_SSE3"
639
  "%vlddqu\t{%1, %0|%0, %1}"
640
  [(set_attr "type" "ssemov")
641
   (set_attr "movu" "1")
642
   (set (attr "prefix_data16")
643
     (if_then_else
644
       (match_test "TARGET_AVX")
645
     (const_string "*")
646
     (const_string "0")))
647
   (set (attr "prefix_rep")
648
     (if_then_else
649
       (match_test "TARGET_AVX")
650
     (const_string "*")
651
     (const_string "1")))
652
   (set_attr "prefix" "maybe_vex")
653
   (set_attr "mode" "")])
654
 
655
(define_insn "sse2_movnti"
656
  [(set (match_operand:SWI48 0 "memory_operand" "=m")
657
        (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
658
                      UNSPEC_MOVNT))]
659
  "TARGET_SSE2"
660
  "movnti\t{%1, %0|%0, %1}"
661
  [(set_attr "type" "ssemov")
662
   (set_attr "prefix_data16" "0")
663
   (set_attr "mode" "")])
664
 
665
(define_insn "_movnt"
666
  [(set (match_operand:VF 0 "memory_operand" "=m")
667
        (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
668
                   UNSPEC_MOVNT))]
669
  "TARGET_SSE"
670
  "%vmovnt\t{%1, %0|%0, %1}"
671
  [(set_attr "type" "ssemov")
672
   (set_attr "prefix" "maybe_vex")
673
   (set_attr "mode" "")])
674
 
675
(define_insn "_movnt"
676
  [(set (match_operand:VI8 0 "memory_operand" "=m")
677
        (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
678
                    UNSPEC_MOVNT))]
679
  "TARGET_SSE2"
680
  "%vmovntdq\t{%1, %0|%0, %1}"
681
  [(set_attr "type" "ssecvt")
682
   (set (attr "prefix_data16")
683
     (if_then_else
684
       (match_test "TARGET_AVX")
685
     (const_string "*")
686
     (const_string "1")))
687
   (set_attr "prefix" "maybe_vex")
688
   (set_attr "mode" "")])
689
 
690
; Expand patterns for non-temporal stores.  At the moment, only those
691
; that directly map to insns are defined; it would be possible to
692
; define patterns for other modes that would expand to several insns.
693
 
694
;; Modes handled by storent patterns.
695
(define_mode_iterator STORENT_MODE
696
  [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
697
   (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
698
   (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
699
   (V8SF "TARGET_AVX") V4SF
700
   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
701
 
702
(define_expand "storent"
703
  [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
704
        (unspec:STORENT_MODE
705
          [(match_operand:STORENT_MODE 1 "register_operand" "")]
706
          UNSPEC_MOVNT))]
707
  "TARGET_SSE")
708
 
709
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
710
;;
711
;; Parallel floating point arithmetic
712
;;
713
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
714
 
715
(define_expand "2"
716
  [(set (match_operand:VF 0 "register_operand" "")
717
        (absneg:VF
718
          (match_operand:VF 1 "register_operand" "")))]
719
  "TARGET_SSE"
720
  "ix86_expand_fp_absneg_operator (, mode, operands); DONE;")
721
 
722
(define_insn_and_split "*absneg2"
723
  [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
724
        (match_operator:VF 3 "absneg_operator"
725
          [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
726
   (use (match_operand:VF 2 "nonimmediate_operand"    "xm,0, xm,x"))]
727
  "TARGET_SSE"
728
  "#"
729
  "&& reload_completed"
730
  [(const_int 0)]
731
{
732
  enum rtx_code absneg_op;
733
  rtx op1, op2;
734
  rtx t;
735
 
736
  if (TARGET_AVX)
737
    {
738
      if (MEM_P (operands[1]))
739
        op1 = operands[2], op2 = operands[1];
740
      else
741
        op1 = operands[1], op2 = operands[2];
742
    }
743
  else
744
    {
745
      op1 = operands[0];
746
      if (rtx_equal_p (operands[0], operands[1]))
747
        op2 = operands[2];
748
      else
749
        op2 = operands[1];
750
    }
751
 
752
  absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
753
  t = gen_rtx_fmt_ee (absneg_op, mode, op1, op2);
754
  t = gen_rtx_SET (VOIDmode, operands[0], t);
755
  emit_insn (t);
756
  DONE;
757
}
758
  [(set_attr "isa" "noavx,noavx,avx,avx")])
759
 
760
(define_expand "3"
761
  [(set (match_operand:VF 0 "register_operand" "")
762
        (plusminus:VF
763
          (match_operand:VF 1 "nonimmediate_operand" "")
764
          (match_operand:VF 2 "nonimmediate_operand" "")))]
765
  "TARGET_SSE"
766
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
767
 
768
(define_insn "*3"
769
  [(set (match_operand:VF 0 "register_operand" "=x,x")
770
        (plusminus:VF
771
          (match_operand:VF 1 "nonimmediate_operand" "0,x")
772
          (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
773
  "TARGET_SSE && ix86_binary_operator_ok (, mode, operands)"
774
  "@
775
   \t{%2, %0|%0, %2}
776
   v\t{%2, %1, %0|%0, %1, %2}"
777
  [(set_attr "isa" "noavx,avx")
778
   (set_attr "type" "sseadd")
779
   (set_attr "prefix" "orig,vex")
780
   (set_attr "mode" "")])
781
 
782
(define_insn "_vm3"
783
  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
784
        (vec_merge:VF_128
785
          (plusminus:VF_128
786
            (match_operand:VF_128 1 "register_operand" "0,x")
787
            (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
788
          (match_dup 1)
789
          (const_int 1)))]
790
  "TARGET_SSE"
791
  "@
792
   \t{%2, %0|%0, %2}
793
   v\t{%2, %1, %0|%0, %1, %2}"
794
  [(set_attr "isa" "noavx,avx")
795
   (set_attr "type" "sseadd")
796
   (set_attr "prefix" "orig,vex")
797
   (set_attr "mode" "")])
798
 
799
(define_expand "mul3"
800
  [(set (match_operand:VF 0 "register_operand" "")
801
        (mult:VF
802
          (match_operand:VF 1 "nonimmediate_operand" "")
803
          (match_operand:VF 2 "nonimmediate_operand" "")))]
804
  "TARGET_SSE"
805
  "ix86_fixup_binary_operands_no_copy (MULT, mode, operands);")
806
 
807
(define_insn "*mul3"
808
  [(set (match_operand:VF 0 "register_operand" "=x,x")
809
        (mult:VF
810
          (match_operand:VF 1 "nonimmediate_operand" "%0,x")
811
          (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
812
  "TARGET_SSE && ix86_binary_operator_ok (MULT, mode, operands)"
813
  "@
814
   mul\t{%2, %0|%0, %2}
815
   vmul\t{%2, %1, %0|%0, %1, %2}"
816
  [(set_attr "isa" "noavx,avx")
817
   (set_attr "type" "ssemul")
818
   (set_attr "prefix" "orig,vex")
819
   (set_attr "mode" "")])
820
 
821
(define_insn "_vmmul3"
822
  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
823
        (vec_merge:VF_128
824
          (mult:VF_128
825
            (match_operand:VF_128 1 "register_operand" "0,x")
826
            (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
827
          (match_dup 1)
828
          (const_int 1)))]
829
  "TARGET_SSE"
830
  "@
831
   mul\t{%2, %0|%0, %2}
832
   vmul\t{%2, %1, %0|%0, %1, %2}"
833
  [(set_attr "isa" "noavx,avx")
834
   (set_attr "type" "ssemul")
835
   (set_attr "prefix" "orig,vex")
836
   (set_attr "mode" "")])
837
 
838
(define_expand "div3"
839
  [(set (match_operand:VF2 0 "register_operand" "")
840
        (div:VF2 (match_operand:VF2 1 "register_operand" "")
841
                 (match_operand:VF2 2 "nonimmediate_operand" "")))]
842
  "TARGET_SSE2"
843
  "ix86_fixup_binary_operands_no_copy (DIV, mode, operands);")
844
 
845
(define_expand "div3"
846
  [(set (match_operand:VF1 0 "register_operand" "")
847
        (div:VF1 (match_operand:VF1 1 "register_operand" "")
848
                 (match_operand:VF1 2 "nonimmediate_operand" "")))]
849
  "TARGET_SSE"
850
{
851
  ix86_fixup_binary_operands_no_copy (DIV, mode, operands);
852
 
853
  if (TARGET_SSE_MATH
854
      && TARGET_RECIP_VEC_DIV
855
      && !optimize_insn_for_size_p ()
856
      && flag_finite_math_only && !flag_trapping_math
857
      && flag_unsafe_math_optimizations)
858
    {
859
      ix86_emit_swdivsf (operands[0], operands[1], operands[2], mode);
860
      DONE;
861
    }
862
})
863
 
864
(define_insn "_div3"
865
  [(set (match_operand:VF 0 "register_operand" "=x,x")
866
        (div:VF
867
          (match_operand:VF 1 "register_operand" "0,x")
868
          (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
869
  "TARGET_SSE"
870
  "@
871
   div\t{%2, %0|%0, %2}
872
   vdiv\t{%2, %1, %0|%0, %1, %2}"
873
  [(set_attr "isa" "noavx,avx")
874
   (set_attr "type" "ssediv")
875
   (set_attr "prefix" "orig,vex")
876
   (set_attr "mode" "")])
877
 
878
(define_insn "_vmdiv3"
879
  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
880
        (vec_merge:VF_128
881
          (div:VF_128
882
            (match_operand:VF_128 1 "register_operand" "0,x")
883
            (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
884
          (match_dup 1)
885
          (const_int 1)))]
886
  "TARGET_SSE"
887
  "@
888
   div\t{%2, %0|%0, %2}
889
   vdiv\t{%2, %1, %0|%0, %1, %2}"
890
  [(set_attr "isa" "noavx,avx")
891
   (set_attr "type" "ssediv")
892
   (set_attr "prefix" "orig,vex")
893
   (set_attr "mode" "")])
894
 
895
(define_insn "_rcp2"
896
  [(set (match_operand:VF1 0 "register_operand" "=x")
897
        (unspec:VF1
898
          [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
899
  "TARGET_SSE"
900
  "%vrcpps\t{%1, %0|%0, %1}"
901
  [(set_attr "type" "sse")
902
   (set_attr "atom_sse_attr" "rcp")
903
   (set_attr "prefix" "maybe_vex")
904
   (set_attr "mode" "")])
905
 
906
(define_insn "sse_vmrcpv4sf2"
907
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
908
        (vec_merge:V4SF
909
          (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
910
                       UNSPEC_RCP)
911
          (match_operand:V4SF 2 "register_operand" "0,x")
912
          (const_int 1)))]
913
  "TARGET_SSE"
914
  "@
915
   rcpss\t{%1, %0|%0, %1}
916
   vrcpss\t{%1, %2, %0|%0, %2, %1}"
917
  [(set_attr "isa" "noavx,avx")
918
   (set_attr "type" "sse")
919
   (set_attr "atom_sse_attr" "rcp")
920
   (set_attr "prefix" "orig,vex")
921
   (set_attr "mode" "SF")])
922
 
923
(define_expand "sqrt2"
924
  [(set (match_operand:VF2 0 "register_operand" "")
925
        (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
926
  "TARGET_SSE2")
927
 
928
(define_expand "sqrt2"
929
  [(set (match_operand:VF1 0 "register_operand" "")
930
        (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
931
  "TARGET_SSE"
932
{
933
  if (TARGET_SSE_MATH
934
      && TARGET_RECIP_VEC_SQRT
935
      && !optimize_insn_for_size_p ()
936
      && flag_finite_math_only && !flag_trapping_math
937
      && flag_unsafe_math_optimizations)
938
    {
939
      ix86_emit_swsqrtsf (operands[0], operands[1], mode, false);
940
      DONE;
941
    }
942
})
943
 
944
(define_insn "_sqrt2"
945
  [(set (match_operand:VF 0 "register_operand" "=x")
946
        (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
947
  "TARGET_SSE"
948
  "%vsqrt\t{%1, %0|%0, %1}"
949
  [(set_attr "type" "sse")
950
   (set_attr "atom_sse_attr" "sqrt")
951
   (set_attr "prefix" "maybe_vex")
952
   (set_attr "mode" "")])
953
 
954
(define_insn "_vmsqrt2"
955
  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
956
        (vec_merge:VF_128
957
          (sqrt:VF_128
958
            (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
959
          (match_operand:VF_128 2 "register_operand" "0,x")
960
          (const_int 1)))]
961
  "TARGET_SSE"
962
  "@
963
   sqrt\t{%1, %0|%0, %1}
964
   vsqrt\t{%1, %2, %0|%0, %2, %1}"
965
  [(set_attr "isa" "noavx,avx")
966
   (set_attr "type" "sse")
967
   (set_attr "atom_sse_attr" "sqrt")
968
   (set_attr "prefix" "orig,vex")
969
   (set_attr "mode" "")])
970
 
971
(define_expand "rsqrt2"
972
  [(set (match_operand:VF1 0 "register_operand" "")
973
        (unspec:VF1
974
          [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
975
  "TARGET_SSE_MATH"
976
{
977
  ix86_emit_swsqrtsf (operands[0], operands[1], mode, true);
978
  DONE;
979
})
980
 
981
(define_insn "_rsqrt2"
982
  [(set (match_operand:VF1 0 "register_operand" "=x")
983
        (unspec:VF1
984
          [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
985
  "TARGET_SSE"
986
  "%vrsqrtps\t{%1, %0|%0, %1}"
987
  [(set_attr "type" "sse")
988
   (set_attr "prefix" "maybe_vex")
989
   (set_attr "mode" "")])
990
 
991
(define_insn "sse_vmrsqrtv4sf2"
992
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
993
        (vec_merge:V4SF
994
          (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
995
                       UNSPEC_RSQRT)
996
          (match_operand:V4SF 2 "register_operand" "0,x")
997
          (const_int 1)))]
998
  "TARGET_SSE"
999
  "@
1000
   rsqrtss\t{%1, %0|%0, %1}
1001
   vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
1002
  [(set_attr "isa" "noavx,avx")
1003
   (set_attr "type" "sse")
1004
   (set_attr "prefix" "orig,vex")
1005
   (set_attr "mode" "SF")])
1006
 
1007
;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1008
;; isn't really correct, as those rtl operators aren't defined when
1009
;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
1010
 
1011
(define_expand "3"
1012
  [(set (match_operand:VF 0 "register_operand" "")
1013
        (smaxmin:VF
1014
          (match_operand:VF 1 "nonimmediate_operand" "")
1015
          (match_operand:VF 2 "nonimmediate_operand" "")))]
1016
  "TARGET_SSE"
1017
{
1018
  if (!flag_finite_math_only)
1019
    operands[1] = force_reg (mode, operands[1]);
1020
  ix86_fixup_binary_operands_no_copy (, mode, operands);
1021
})
1022
 
1023
(define_insn "*3_finite"
1024
  [(set (match_operand:VF 0 "register_operand" "=x,x")
1025
        (smaxmin:VF
1026
          (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1027
          (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1028
  "TARGET_SSE && flag_finite_math_only
1029
   && ix86_binary_operator_ok (, mode, operands)"
1030
  "@
1031
   \t{%2, %0|%0, %2}
1032
   v\t{%2, %1, %0|%0, %1, %2}"
1033
  [(set_attr "isa" "noavx,avx")
1034
   (set_attr "type" "sseadd")
1035
   (set_attr "prefix" "orig,vex")
1036
   (set_attr "mode" "")])
1037
 
1038
(define_insn "*3"
1039
  [(set (match_operand:VF 0 "register_operand" "=x,x")
1040
        (smaxmin:VF
1041
          (match_operand:VF 1 "register_operand" "0,x")
1042
          (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1043
  "TARGET_SSE && !flag_finite_math_only"
1044
  "@
1045
   \t{%2, %0|%0, %2}
1046
   v\t{%2, %1, %0|%0, %1, %2}"
1047
  [(set_attr "isa" "noavx,avx")
1048
   (set_attr "type" "sseadd")
1049
   (set_attr "prefix" "orig,vex")
1050
   (set_attr "mode" "")])
1051
 
1052
(define_insn "_vm3"
1053
  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1054
        (vec_merge:VF_128
1055
          (smaxmin:VF_128
1056
            (match_operand:VF_128 1 "register_operand" "0,x")
1057
            (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
1058
         (match_dup 1)
1059
         (const_int 1)))]
1060
  "TARGET_SSE"
1061
  "@
1062
   \t{%2, %0|%0, %2}
1063
   v\t{%2, %1, %0|%0, %1, %2}"
1064
  [(set_attr "isa" "noavx,avx")
1065
   (set_attr "type" "sse")
1066
   (set_attr "prefix" "orig,vex")
1067
   (set_attr "mode" "")])
1068
 
1069
;; These versions of the min/max patterns implement exactly the operations
1070
;;   min = (op1 < op2 ? op1 : op2)
1071
;;   max = (!(op1 < op2) ? op1 : op2)
1072
;; Their operands are not commutative, and thus they may be used in the
1073
;; presence of -0.0 and NaN.
1074
 
1075
(define_insn "*ieee_smin3"
1076
  [(set (match_operand:VF 0 "register_operand" "=x,x")
1077
        (unspec:VF
1078
          [(match_operand:VF 1 "register_operand" "0,x")
1079
           (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1080
         UNSPEC_IEEE_MIN))]
1081
  "TARGET_SSE"
1082
  "@
1083
   min\t{%2, %0|%0, %2}
1084
   vmin\t{%2, %1, %0|%0, %1, %2}"
1085
  [(set_attr "isa" "noavx,avx")
1086
   (set_attr "type" "sseadd")
1087
   (set_attr "prefix" "orig,vex")
1088
   (set_attr "mode" "")])
1089
 
1090
(define_insn "*ieee_smax3"
1091
  [(set (match_operand:VF 0 "register_operand" "=x,x")
1092
        (unspec:VF
1093
          [(match_operand:VF 1 "register_operand" "0,x")
1094
           (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1095
         UNSPEC_IEEE_MAX))]
1096
  "TARGET_SSE"
1097
  "@
1098
   max\t{%2, %0|%0, %2}
1099
   vmax\t{%2, %1, %0|%0, %1, %2}"
1100
  [(set_attr "isa" "noavx,avx")
1101
   (set_attr "type" "sseadd")
1102
   (set_attr "prefix" "orig,vex")
1103
   (set_attr "mode" "")])
1104
 
1105
(define_insn "avx_addsubv4df3"
1106
  [(set (match_operand:V4DF 0 "register_operand" "=x")
1107
        (vec_merge:V4DF
1108
          (plus:V4DF
1109
            (match_operand:V4DF 1 "register_operand" "x")
1110
            (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1111
          (minus:V4DF (match_dup 1) (match_dup 2))
1112
          (const_int 10)))]
1113
  "TARGET_AVX"
1114
  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1115
  [(set_attr "type" "sseadd")
1116
   (set_attr "prefix" "vex")
1117
   (set_attr "mode" "V4DF")])
1118
 
1119
(define_insn "sse3_addsubv2df3"
1120
  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1121
        (vec_merge:V2DF
1122
          (plus:V2DF
1123
            (match_operand:V2DF 1 "register_operand" "0,x")
1124
            (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1125
          (minus:V2DF (match_dup 1) (match_dup 2))
1126
          (const_int 2)))]
1127
  "TARGET_SSE3"
1128
  "@
1129
   addsubpd\t{%2, %0|%0, %2}
1130
   vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1131
  [(set_attr "isa" "noavx,avx")
1132
   (set_attr "type" "sseadd")
1133
   (set_attr "atom_unit" "complex")
1134
   (set_attr "prefix" "orig,vex")
1135
   (set_attr "mode" "V2DF")])
1136
 
1137
(define_insn "avx_addsubv8sf3"
1138
  [(set (match_operand:V8SF 0 "register_operand" "=x")
1139
        (vec_merge:V8SF
1140
          (plus:V8SF
1141
            (match_operand:V8SF 1 "register_operand" "x")
1142
            (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1143
          (minus:V8SF (match_dup 1) (match_dup 2))
1144
          (const_int 170)))]
1145
  "TARGET_AVX"
1146
  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1147
  [(set_attr "type" "sseadd")
1148
   (set_attr "prefix" "vex")
1149
   (set_attr "mode" "V8SF")])
1150
 
1151
(define_insn "sse3_addsubv4sf3"
1152
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1153
        (vec_merge:V4SF
1154
          (plus:V4SF
1155
            (match_operand:V4SF 1 "register_operand" "0,x")
1156
            (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1157
          (minus:V4SF (match_dup 1) (match_dup 2))
1158
          (const_int 10)))]
1159
  "TARGET_SSE3"
1160
  "@
1161
   addsubps\t{%2, %0|%0, %2}
1162
   vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1163
  [(set_attr "isa" "noavx,avx")
1164
   (set_attr "type" "sseadd")
1165
   (set_attr "prefix" "orig,vex")
1166
   (set_attr "prefix_rep" "1,*")
1167
   (set_attr "mode" "V4SF")])
1168
 
1169
(define_insn "avx_hv4df3"
1170
  [(set (match_operand:V4DF 0 "register_operand" "=x")
1171
        (vec_concat:V4DF
1172
          (vec_concat:V2DF
1173
            (plusminus:DF
1174
              (vec_select:DF
1175
                (match_operand:V4DF 1 "register_operand" "x")
1176
                (parallel [(const_int 0)]))
1177
              (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1178
            (plusminus:DF
1179
              (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1180
              (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1181
          (vec_concat:V2DF
1182
            (plusminus:DF
1183
              (vec_select:DF
1184
                (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1185
                (parallel [(const_int 0)]))
1186
              (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1187
            (plusminus:DF
1188
              (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1189
              (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1190
  "TARGET_AVX"
1191
  "vhpd\t{%2, %1, %0|%0, %1, %2}"
1192
  [(set_attr "type" "sseadd")
1193
   (set_attr "prefix" "vex")
1194
   (set_attr "mode" "V4DF")])
1195
 
1196
(define_insn "sse3_hv2df3"
1197
  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1198
        (vec_concat:V2DF
1199
          (plusminus:DF
1200
            (vec_select:DF
1201
              (match_operand:V2DF 1 "register_operand" "0,x")
1202
              (parallel [(const_int 0)]))
1203
            (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1204
          (plusminus:DF
1205
            (vec_select:DF
1206
              (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1207
              (parallel [(const_int 0)]))
1208
            (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1209
  "TARGET_SSE3"
1210
  "@
1211
   hpd\t{%2, %0|%0, %2}
1212
   vhpd\t{%2, %1, %0|%0, %1, %2}"
1213
  [(set_attr "isa" "noavx,avx")
1214
   (set_attr "type" "sseadd")
1215
   (set_attr "prefix" "orig,vex")
1216
   (set_attr "mode" "V2DF")])
1217
 
1218
(define_insn "avx_hv8sf3"
1219
  [(set (match_operand:V8SF 0 "register_operand" "=x")
1220
        (vec_concat:V8SF
1221
          (vec_concat:V4SF
1222
            (vec_concat:V2SF
1223
              (plusminus:SF
1224
                (vec_select:SF
1225
                  (match_operand:V8SF 1 "register_operand" "x")
1226
                  (parallel [(const_int 0)]))
1227
                (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1228
              (plusminus:SF
1229
                (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1230
                (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1231
            (vec_concat:V2SF
1232
              (plusminus:SF
1233
                (vec_select:SF
1234
                  (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1235
                  (parallel [(const_int 0)]))
1236
                (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1237
              (plusminus:SF
1238
                (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1239
                (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1240
          (vec_concat:V4SF
1241
            (vec_concat:V2SF
1242
              (plusminus:SF
1243
                (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1244
                (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1245
              (plusminus:SF
1246
                (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1247
                (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1248
            (vec_concat:V2SF
1249
              (plusminus:SF
1250
                (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1251
                (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1252
              (plusminus:SF
1253
                (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1254
                (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1255
  "TARGET_AVX"
1256
  "vhps\t{%2, %1, %0|%0, %1, %2}"
1257
  [(set_attr "type" "sseadd")
1258
   (set_attr "prefix" "vex")
1259
   (set_attr "mode" "V8SF")])
1260
 
1261
(define_insn "sse3_hv4sf3"
1262
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1263
        (vec_concat:V4SF
1264
          (vec_concat:V2SF
1265
            (plusminus:SF
1266
              (vec_select:SF
1267
                (match_operand:V4SF 1 "register_operand" "0,x")
1268
                (parallel [(const_int 0)]))
1269
              (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1270
            (plusminus:SF
1271
              (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1272
              (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1273
          (vec_concat:V2SF
1274
            (plusminus:SF
1275
              (vec_select:SF
1276
                (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1277
                (parallel [(const_int 0)]))
1278
              (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1279
            (plusminus:SF
1280
              (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1281
              (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1282
  "TARGET_SSE3"
1283
  "@
1284
   hps\t{%2, %0|%0, %2}
1285
   vhps\t{%2, %1, %0|%0, %1, %2}"
1286
  [(set_attr "isa" "noavx,avx")
1287
   (set_attr "type" "sseadd")
1288
   (set_attr "atom_unit" "complex")
1289
   (set_attr "prefix" "orig,vex")
1290
   (set_attr "prefix_rep" "1,*")
1291
   (set_attr "mode" "V4SF")])
1292
 
1293
(define_expand "reduc_splus_v4df"
1294
  [(match_operand:V4DF 0 "register_operand" "")
1295
   (match_operand:V4DF 1 "register_operand" "")]
1296
  "TARGET_AVX"
1297
{
1298
  rtx tmp = gen_reg_rtx (V4DFmode);
1299
  rtx tmp2 = gen_reg_rtx (V4DFmode);
1300
  emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1301
  emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1302
  emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1303
  DONE;
1304
})
1305
 
1306
(define_expand "reduc_splus_v2df"
1307
  [(match_operand:V2DF 0 "register_operand" "")
1308
   (match_operand:V2DF 1 "register_operand" "")]
1309
  "TARGET_SSE3"
1310
{
1311
  emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1312
  DONE;
1313
})
1314
 
1315
(define_expand "reduc_splus_v8sf"
1316
  [(match_operand:V8SF 0 "register_operand" "")
1317
   (match_operand:V8SF 1 "register_operand" "")]
1318
  "TARGET_AVX"
1319
{
1320
  rtx tmp = gen_reg_rtx (V8SFmode);
1321
  rtx tmp2 = gen_reg_rtx (V8SFmode);
1322
  emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1323
  emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1324
  emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1325
  emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1326
  DONE;
1327
})
1328
 
1329
(define_expand "reduc_splus_v4sf"
1330
  [(match_operand:V4SF 0 "register_operand" "")
1331
   (match_operand:V4SF 1 "register_operand" "")]
1332
  "TARGET_SSE"
1333
{
1334
  if (TARGET_SSE3)
1335
    {
1336
      rtx tmp = gen_reg_rtx (V4SFmode);
1337
      emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1338
      emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1339
    }
1340
  else
1341
    ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1342
  DONE;
1343
})
1344
 
1345
;; Modes handled by reduc_sm{in,ax}* patterns.
1346
(define_mode_iterator REDUC_SMINMAX_MODE
1347
  [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1348
   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1349
   (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1350
   (V4SF "TARGET_SSE")])
1351
 
1352
(define_expand "reduc__"
1353
  [(smaxmin:REDUC_SMINMAX_MODE
1354
     (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1355
     (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1356
  ""
1357
{
1358
  ix86_expand_reduc (gen_3, operands[0], operands[1]);
1359
  DONE;
1360
})
1361
 
1362
(define_expand "reduc__"
1363
  [(umaxmin:VI_256
1364
     (match_operand:VI_256 0 "register_operand" "")
1365
     (match_operand:VI_256 1 "register_operand" ""))]
1366
  "TARGET_AVX2"
1367
{
1368
  ix86_expand_reduc (gen_3, operands[0], operands[1]);
1369
  DONE;
1370
})
1371
 
1372
(define_expand "reduc_umin_v8hi"
1373
  [(umin:V8HI
1374
     (match_operand:V8HI 0 "register_operand" "")
1375
     (match_operand:V8HI 1 "register_operand" ""))]
1376
  "TARGET_SSE4_1"
1377
{
1378
  ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1379
  DONE;
1380
})
1381
 
1382
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1383
;;
1384
;; Parallel floating point comparisons
1385
;;
1386
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1387
 
1388
(define_insn "avx_cmp3"
1389
  [(set (match_operand:VF 0 "register_operand" "=x")
1390
        (unspec:VF
1391
          [(match_operand:VF 1 "register_operand" "x")
1392
           (match_operand:VF 2 "nonimmediate_operand" "xm")
1393
           (match_operand:SI 3 "const_0_to_31_operand" "n")]
1394
          UNSPEC_PCMP))]
1395
  "TARGET_AVX"
1396
  "vcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1397
  [(set_attr "type" "ssecmp")
1398
   (set_attr "length_immediate" "1")
1399
   (set_attr "prefix" "vex")
1400
   (set_attr "mode" "")])
1401
 
1402
(define_insn "avx_vmcmp3"
1403
  [(set (match_operand:VF_128 0 "register_operand" "=x")
1404
        (vec_merge:VF_128
1405
          (unspec:VF_128
1406
            [(match_operand:VF_128 1 "register_operand" "x")
1407
             (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1408
             (match_operand:SI 3 "const_0_to_31_operand" "n")]
1409
            UNSPEC_PCMP)
1410
         (match_dup 1)
1411
         (const_int 1)))]
1412
  "TARGET_AVX"
1413
  "vcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1414
  [(set_attr "type" "ssecmp")
1415
   (set_attr "length_immediate" "1")
1416
   (set_attr "prefix" "vex")
1417
   (set_attr "mode" "")])
1418
 
1419
(define_insn "*_maskcmp3_comm"
1420
  [(set (match_operand:VF 0 "register_operand" "=x,x")
1421
        (match_operator:VF 3 "sse_comparison_operator"
1422
          [(match_operand:VF 1 "register_operand" "%0,x")
1423
           (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1424
  "TARGET_SSE
1425
   && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1426
  "@
1427
   cmp%D3\t{%2, %0|%0, %2}
1428
   vcmp%D3\t{%2, %1, %0|%0, %1, %2}"
1429
  [(set_attr "isa" "noavx,avx")
1430
   (set_attr "type" "ssecmp")
1431
   (set_attr "length_immediate" "1")
1432
   (set_attr "prefix" "orig,vex")
1433
   (set_attr "mode" "")])
1434
 
1435
(define_insn "_maskcmp3"
1436
  [(set (match_operand:VF 0 "register_operand" "=x,x")
1437
        (match_operator:VF 3 "sse_comparison_operator"
1438
          [(match_operand:VF 1 "register_operand" "0,x")
1439
           (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1440
  "TARGET_SSE"
1441
  "@
1442
   cmp%D3\t{%2, %0|%0, %2}
1443
   vcmp%D3\t{%2, %1, %0|%0, %1, %2}"
1444
  [(set_attr "isa" "noavx,avx")
1445
   (set_attr "type" "ssecmp")
1446
   (set_attr "length_immediate" "1")
1447
   (set_attr "prefix" "orig,vex")
1448
   (set_attr "mode" "")])
1449
 
1450
(define_insn "_vmmaskcmp3"
1451
  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1452
        (vec_merge:VF_128
1453
         (match_operator:VF_128 3 "sse_comparison_operator"
1454
           [(match_operand:VF_128 1 "register_operand" "0,x")
1455
            (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1456
         (match_dup 1)
1457
         (const_int 1)))]
1458
  "TARGET_SSE"
1459
  "@
1460
   cmp%D3\t{%2, %0|%0, %2}
1461
   vcmp%D3\t{%2, %1, %0|%0, %1, %2}"
1462
  [(set_attr "isa" "noavx,avx")
1463
   (set_attr "type" "ssecmp")
1464
   (set_attr "length_immediate" "1,*")
1465
   (set_attr "prefix" "orig,vex")
1466
   (set_attr "mode" "")])
1467
 
1468
(define_insn "_comi"
1469
  [(set (reg:CCFP FLAGS_REG)
1470
        (compare:CCFP
1471
          (vec_select:MODEF
1472
            (match_operand: 0 "register_operand" "x")
1473
            (parallel [(const_int 0)]))
1474
          (vec_select:MODEF
1475
            (match_operand: 1 "nonimmediate_operand" "xm")
1476
            (parallel [(const_int 0)]))))]
1477
  "SSE_FLOAT_MODE_P (mode)"
1478
  "%vcomi\t{%1, %0|%0, %1}"
1479
  [(set_attr "type" "ssecomi")
1480
   (set_attr "prefix" "maybe_vex")
1481
   (set_attr "prefix_rep" "0")
1482
   (set (attr "prefix_data16")
1483
        (if_then_else (eq_attr "mode" "DF")
1484
                      (const_string "1")
1485
                      (const_string "0")))
1486
   (set_attr "mode" "")])
1487
 
1488
(define_insn "_ucomi"
1489
  [(set (reg:CCFPU FLAGS_REG)
1490
        (compare:CCFPU
1491
          (vec_select:MODEF
1492
            (match_operand: 0 "register_operand" "x")
1493
            (parallel [(const_int 0)]))
1494
          (vec_select:MODEF
1495
            (match_operand: 1 "nonimmediate_operand" "xm")
1496
            (parallel [(const_int 0)]))))]
1497
  "SSE_FLOAT_MODE_P (mode)"
1498
  "%vucomi\t{%1, %0|%0, %1}"
1499
  [(set_attr "type" "ssecomi")
1500
   (set_attr "prefix" "maybe_vex")
1501
   (set_attr "prefix_rep" "0")
1502
   (set (attr "prefix_data16")
1503
        (if_then_else (eq_attr "mode" "DF")
1504
                      (const_string "1")
1505
                      (const_string "0")))
1506
   (set_attr "mode" "")])
1507
 
1508
(define_expand "vcond"
1509
  [(set (match_operand:V_256 0 "register_operand" "")
1510
        (if_then_else:V_256
1511
          (match_operator 3 ""
1512
            [(match_operand:VF_256 4 "nonimmediate_operand" "")
1513
             (match_operand:VF_256 5 "nonimmediate_operand" "")])
1514
          (match_operand:V_256 1 "general_operand" "")
1515
          (match_operand:V_256 2 "general_operand" "")))]
1516
  "TARGET_AVX
1517
   && (GET_MODE_NUNITS (mode)
1518
       == GET_MODE_NUNITS (mode))"
1519
{
1520
  bool ok = ix86_expand_fp_vcond (operands);
1521
  gcc_assert (ok);
1522
  DONE;
1523
})
1524
 
1525
(define_expand "vcond"
1526
  [(set (match_operand:V_128 0 "register_operand" "")
1527
        (if_then_else:V_128
1528
          (match_operator 3 ""
1529
            [(match_operand:VF_128 4 "nonimmediate_operand" "")
1530
             (match_operand:VF_128 5 "nonimmediate_operand" "")])
1531
          (match_operand:V_128 1 "general_operand" "")
1532
          (match_operand:V_128 2 "general_operand" "")))]
1533
  "TARGET_SSE
1534
   && (GET_MODE_NUNITS (mode)
1535
       == GET_MODE_NUNITS (mode))"
1536
{
1537
  bool ok = ix86_expand_fp_vcond (operands);
1538
  gcc_assert (ok);
1539
  DONE;
1540
})
1541
 
1542
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1543
;;
1544
;; Parallel floating point logical operations
1545
;;
1546
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1547
 
1548
(define_insn "_andnot3"
1549
  [(set (match_operand:VF 0 "register_operand" "=x,x")
1550
        (and:VF
1551
          (not:VF
1552
            (match_operand:VF 1 "register_operand" "0,x"))
1553
          (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1554
  "TARGET_SSE"
1555
{
1556
  static char buf[32];
1557
  const char *insn;
1558
  const char *suffix
1559
    = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "";
1560
 
1561
  switch (which_alternative)
1562
    {
1563
    case 0:
1564
      insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1565
      break;
1566
    case 1:
1567
      insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1568
      break;
1569
    default:
1570
      gcc_unreachable ();
1571
    }
1572
 
1573
  snprintf (buf, sizeof (buf), insn, suffix);
1574
  return buf;
1575
}
1576
  [(set_attr "isa" "noavx,avx")
1577
   (set_attr "type" "sselog")
1578
   (set_attr "prefix" "orig,vex")
1579
   (set_attr "mode" "")])
1580
 
1581
(define_expand "3"
1582
  [(set (match_operand:VF 0 "register_operand" "")
1583
        (any_logic:VF
1584
          (match_operand:VF 1 "nonimmediate_operand" "")
1585
          (match_operand:VF 2 "nonimmediate_operand" "")))]
1586
  "TARGET_SSE"
1587
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
1588
 
1589
(define_insn "*3"
1590
  [(set (match_operand:VF 0 "register_operand" "=x,x")
1591
        (any_logic:VF
1592
          (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1593
          (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1594
  "TARGET_SSE && ix86_binary_operator_ok (, mode, operands)"
1595
{
1596
  static char buf[32];
1597
  const char *insn;
1598
  const char *suffix
1599
    = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "";
1600
 
1601
  switch (which_alternative)
1602
    {
1603
    case 0:
1604
      insn = "%s\t{%%2, %%0|%%0, %%2}";
1605
      break;
1606
    case 1:
1607
      insn = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1608
      break;
1609
    default:
1610
      gcc_unreachable ();
1611
    }
1612
 
1613
  snprintf (buf, sizeof (buf), insn, suffix);
1614
  return buf;
1615
}
1616
  [(set_attr "isa" "noavx,avx")
1617
   (set_attr "type" "sselog")
1618
   (set_attr "prefix" "orig,vex")
1619
   (set_attr "mode" "")])
1620
 
1621
(define_expand "copysign3"
1622
  [(set (match_dup 4)
1623
        (and:VF
1624
          (not:VF (match_dup 3))
1625
          (match_operand:VF 1 "nonimmediate_operand" "")))
1626
   (set (match_dup 5)
1627
        (and:VF (match_dup 3)
1628
                (match_operand:VF 2 "nonimmediate_operand" "")))
1629
   (set (match_operand:VF 0 "register_operand" "")
1630
        (ior:VF (match_dup 4) (match_dup 5)))]
1631
  "TARGET_SSE"
1632
{
1633
  operands[3] = ix86_build_signbit_mask (mode, 1, 0);
1634
 
1635
  operands[4] = gen_reg_rtx (mode);
1636
  operands[5] = gen_reg_rtx (mode);
1637
})
1638
 
1639
;; Also define scalar versions.  These are used for abs, neg, and
1640
;; conditional move.  Using subregs into vector modes causes register
1641
;; allocation lossage.  These patterns do not allow memory operands
1642
;; because the native instructions read the full 128-bits.
1643
 
1644
(define_insn "*andnot3"
1645
  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1646
        (and:MODEF
1647
          (not:MODEF
1648
            (match_operand:MODEF 1 "register_operand" "0,x"))
1649
            (match_operand:MODEF 2 "register_operand" "x,x")))]
1650
  "SSE_FLOAT_MODE_P (mode)"
1651
{
1652
  static char buf[32];
1653
  const char *insn;
1654
  const char *suffix
1655
    = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "";
1656
 
1657
  switch (which_alternative)
1658
    {
1659
    case 0:
1660
      insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1661
      break;
1662
    case 1:
1663
      insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1664
      break;
1665
    default:
1666
      gcc_unreachable ();
1667
    }
1668
 
1669
  snprintf (buf, sizeof (buf), insn, suffix);
1670
  return buf;
1671
}
1672
  [(set_attr "isa" "noavx,avx")
1673
   (set_attr "type" "sselog")
1674
   (set_attr "prefix" "orig,vex")
1675
   (set_attr "mode" "")])
1676
 
1677
(define_insn "*3"
1678
  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1679
        (any_logic:MODEF
1680
          (match_operand:MODEF 1 "register_operand" "%0,x")
1681
          (match_operand:MODEF 2 "register_operand" "x,x")))]
1682
  "SSE_FLOAT_MODE_P (mode)"
1683
{
1684
  static char buf[32];
1685
  const char *insn;
1686
  const char *suffix
1687
    = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "";
1688
 
1689
  switch (which_alternative)
1690
    {
1691
    case 0:
1692
      insn = "%s\t{%%2, %%0|%%0, %%2}";
1693
      break;
1694
    case 1:
1695
      insn = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1696
      break;
1697
    default:
1698
      gcc_unreachable ();
1699
    }
1700
 
1701
  snprintf (buf, sizeof (buf), insn, suffix);
1702
  return buf;
1703
}
1704
  [(set_attr "isa" "noavx,avx")
1705
   (set_attr "type" "sselog")
1706
   (set_attr "prefix" "orig,vex")
1707
   (set_attr "mode" "")])
1708
 
1709
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1710
;;
1711
;; FMA4 floating point multiply/accumulate instructions.  This
1712
;; includes the scalar version of the instructions as well as the
1713
;; vector.
1714
;;
1715
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1716
 
1717
;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1718
;; combine to generate a multiply/add with two memory references.  We then
1719
;; split this insn, into loading up the destination register with one of the
1720
;; memory operations.  If we don't manage to split the insn, reload will
1721
;; generate the appropriate moves.  The reason this is needed, is that combine
1722
;; has already folded one of the memory references into both the multiply and
1723
;; add insns, and it can't generate a new pseudo.  I.e.:
1724
;;      (set (reg1) (mem (addr1)))
1725
;;      (set (reg2) (mult (reg1) (mem (addr2))))
1726
;;      (set (reg3) (plus (reg2) (mem (addr3))))
1727
;;
1728
;; ??? This is historic, pre-dating the gimple fma transformation.
1729
;; We could now properly represent that only one memory operand is
1730
;; allowed and not be penalized during optimization.
1731
 
1732
;; Intrinsic FMA operations.
1733
 
1734
;; The standard names for fma is only available with SSE math enabled.
1735
(define_expand "fma4"
1736
  [(set (match_operand:FMAMODE 0 "register_operand")
1737
        (fma:FMAMODE
1738
          (match_operand:FMAMODE 1 "nonimmediate_operand")
1739
          (match_operand:FMAMODE 2 "nonimmediate_operand")
1740
          (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1741
  "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1742
 
1743
(define_expand "fms4"
1744
  [(set (match_operand:FMAMODE 0 "register_operand")
1745
        (fma:FMAMODE
1746
          (match_operand:FMAMODE 1 "nonimmediate_operand")
1747
          (match_operand:FMAMODE 2 "nonimmediate_operand")
1748
          (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1749
  "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1750
 
1751
(define_expand "fnma4"
1752
  [(set (match_operand:FMAMODE 0 "register_operand")
1753
        (fma:FMAMODE
1754
          (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1755
          (match_operand:FMAMODE 2 "nonimmediate_operand")
1756
          (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1757
  "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1758
 
1759
(define_expand "fnms4"
1760
  [(set (match_operand:FMAMODE 0 "register_operand")
1761
        (fma:FMAMODE
1762
          (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1763
          (match_operand:FMAMODE 2 "nonimmediate_operand")
1764
          (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1765
  "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1766
 
1767
;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1768
(define_expand "fma4i_fmadd_"
1769
  [(set (match_operand:FMAMODE 0 "register_operand")
1770
        (fma:FMAMODE
1771
          (match_operand:FMAMODE 1 "nonimmediate_operand")
1772
          (match_operand:FMAMODE 2 "nonimmediate_operand")
1773
          (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1774
  "TARGET_FMA || TARGET_FMA4")
1775
 
1776
(define_insn "*fma4i_fmadd_"
1777
  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1778
        (fma:FMAMODE
1779
          (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1780
          (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1781
          (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1782
  "TARGET_FMA4"
1783
  "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1784
  [(set_attr "type" "ssemuladd")
1785
   (set_attr "mode" "")])
1786
 
1787
(define_insn "*fma4i_fmsub_"
1788
  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1789
        (fma:FMAMODE
1790
          (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1791
          (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1792
          (neg:FMAMODE
1793
            (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1794
  "TARGET_FMA4"
1795
  "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1796
  [(set_attr "type" "ssemuladd")
1797
   (set_attr "mode" "")])
1798
 
1799
(define_insn "*fma4i_fnmadd_"
1800
  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1801
        (fma:FMAMODE
1802
          (neg:FMAMODE
1803
            (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1804
          (match_operand:FMAMODE   2 "nonimmediate_operand" " x,m")
1805
          (match_operand:FMAMODE   3 "nonimmediate_operand" "xm,x")))]
1806
  "TARGET_FMA4"
1807
  "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1808
  [(set_attr "type" "ssemuladd")
1809
   (set_attr "mode" "")])
1810
 
1811
(define_insn "*fma4i_fnmsub_"
1812
  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1813
        (fma:FMAMODE
1814
          (neg:FMAMODE
1815
            (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1816
          (match_operand:FMAMODE   2 "nonimmediate_operand" " x,m")
1817
          (neg:FMAMODE
1818
            (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1819
  "TARGET_FMA4"
1820
  "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1821
  [(set_attr "type" "ssemuladd")
1822
   (set_attr "mode" "")])
1823
 
1824
;; Scalar versions of the above.  Unlike ADDSS et al, these write the
1825
;; entire destination register, with the high-order elements zeroed.
1826
 
1827
(define_expand "fma4i_vmfmadd_"
1828
  [(set (match_operand:VF_128 0 "register_operand")
1829
        (vec_merge:VF_128
1830
          (fma:VF_128
1831
            (match_operand:VF_128 1 "nonimmediate_operand")
1832
            (match_operand:VF_128 2 "nonimmediate_operand")
1833
            (match_operand:VF_128 3 "nonimmediate_operand"))
1834
          (match_dup 4)
1835
          (const_int 1)))]
1836
  "TARGET_FMA4"
1837
{
1838
  operands[4] = CONST0_RTX (mode);
1839
})
1840
 
1841
(define_expand "fmai_vmfmadd_"
1842
  [(set (match_operand:VF_128 0 "register_operand")
1843
        (vec_merge:VF_128
1844
          (fma:VF_128
1845
            (match_operand:VF_128 1 "nonimmediate_operand")
1846
            (match_operand:VF_128 2 "nonimmediate_operand")
1847
            (match_operand:VF_128 3 "nonimmediate_operand"))
1848
          (match_dup 0)
1849
          (const_int 1)))]
1850
  "TARGET_FMA")
1851
 
1852
(define_insn "*fmai_fmadd_"
1853
  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1854
        (vec_merge:VF_128
1855
          (fma:VF_128
1856
            (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1857
            (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1858
            (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1859
          (match_dup 0)
1860
          (const_int 1)))]
1861
  "TARGET_FMA"
1862
  "@
1863
   vfmadd132\t{%2, %3, %0|%0, %3, %2}
1864
   vfmadd213\t{%3, %2, %0|%0, %2, %3}
1865
   vfmadd231\t{%2, %1, %0|%0, %1, %2}"
1866
  [(set_attr "type" "ssemuladd")
1867
   (set_attr "mode" "")])
1868
 
1869
(define_insn "*fmai_fmsub_"
1870
  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1871
        (vec_merge:VF_128
1872
          (fma:VF_128
1873
            (match_operand:VF_128   1 "nonimmediate_operand" "%0, 0,x")
1874
            (match_operand:VF_128   2 "nonimmediate_operand" "xm, x,xm")
1875
            (neg:VF_128
1876
              (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1877
          (match_dup 0)
1878
          (const_int 1)))]
1879
  "TARGET_FMA"
1880
  "@
1881
   vfmsub132\t{%2, %3, %0|%0, %3, %2}
1882
   vfmsub213\t{%3, %2, %0|%0, %2, %3}
1883
   vfmsub231\t{%2, %1, %0|%0, %1, %2}"
1884
  [(set_attr "type" "ssemuladd")
1885
   (set_attr "mode" "")])
1886
 
1887
(define_insn "*fmai_fnmadd_"
1888
  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1889
        (vec_merge:VF_128
1890
          (fma:VF_128
1891
            (neg:VF_128
1892
              (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1893
            (match_operand:VF_128   2 "nonimmediate_operand" "xm, x,xm")
1894
            (match_operand:VF_128   3 "nonimmediate_operand" " x,xm,0"))
1895
          (match_dup 0)
1896
          (const_int 1)))]
1897
  "TARGET_FMA"
1898
  "@
1899
   vfnmadd132\t{%2, %3, %0|%0, %3, %2}
1900
   vfnmadd213\t{%3, %2, %0|%0, %2, %3}
1901
   vfnmadd231\t{%2, %1, %0|%0, %1, %2}"
1902
  [(set_attr "type" "ssemuladd")
1903
   (set_attr "mode" "")])
1904
 
1905
(define_insn "*fmai_fnmsub_"
1906
  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1907
        (vec_merge:VF_128
1908
          (fma:VF_128
1909
            (neg:VF_128
1910
              (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1911
            (match_operand:VF_128   2 "nonimmediate_operand" "xm, x,xm")
1912
            (neg:VF_128
1913
              (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1914
          (match_dup 0)
1915
          (const_int 1)))]
1916
  "TARGET_FMA"
1917
  "@
1918
   vfnmsub132\t{%2, %3, %0|%0, %3, %2}
1919
   vfnmsub213\t{%3, %2, %0|%0, %2, %3}
1920
   vfnmsub231\t{%2, %1, %0|%0, %1, %2}"
1921
  [(set_attr "type" "ssemuladd")
1922
   (set_attr "mode" "")])
1923
 
1924
(define_insn "*fma4i_vmfmadd_"
1925
  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1926
        (vec_merge:VF_128
1927
          (fma:VF_128
1928
            (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1929
            (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1930
            (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1931
          (match_operand:VF_128 4 "const0_operand" "")
1932
          (const_int 1)))]
1933
  "TARGET_FMA4"
1934
  "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1935
  [(set_attr "type" "ssemuladd")
1936
   (set_attr "mode" "")])
1937
 
1938
(define_insn "*fma4i_vmfmsub_"
1939
  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1940
        (vec_merge:VF_128
1941
          (fma:VF_128
1942
            (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1943
            (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1944
            (neg:VF_128
1945
              (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1946
          (match_operand:VF_128 4 "const0_operand" "")
1947
          (const_int 1)))]
1948
  "TARGET_FMA4"
1949
  "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1950
  [(set_attr "type" "ssemuladd")
1951
   (set_attr "mode" "")])
1952
 
1953
(define_insn "*fma4i_vmfnmadd_"
1954
  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1955
        (vec_merge:VF_128
1956
          (fma:VF_128
1957
            (neg:VF_128
1958
              (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1959
            (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
1960
            (match_operand:VF_128   3 "nonimmediate_operand" "xm,x"))
1961
          (match_operand:VF_128 4 "const0_operand" "")
1962
          (const_int 1)))]
1963
  "TARGET_FMA4"
1964
  "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1965
  [(set_attr "type" "ssemuladd")
1966
   (set_attr "mode" "")])
1967
 
1968
(define_insn "*fma4i_vmfnmsub_"
1969
  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1970
        (vec_merge:VF_128
1971
          (fma:VF_128
1972
            (neg:VF_128
1973
              (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1974
            (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
1975
            (neg:VF_128
1976
              (match_operand:VF_128   3 "nonimmediate_operand" "xm,x")))
1977
          (match_operand:VF_128 4 "const0_operand" "")
1978
          (const_int 1)))]
1979
  "TARGET_FMA4"
1980
  "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1981
  [(set_attr "type" "ssemuladd")
1982
   (set_attr "mode" "")])
1983
 
1984
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1985
;;
1986
;; FMA4 Parallel floating point multiply addsub and subadd operations.
1987
;;
1988
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1989
 
1990
;; It would be possible to represent these without the UNSPEC as
1991
;;
1992
;; (vec_merge
1993
;;   (fma op1 op2 op3)
1994
;;   (fma op1 op2 (neg op3))
1995
;;   (merge-const))
1996
;;
1997
;; But this doesn't seem useful in practice.
1998
 
1999
(define_expand "fmaddsub_"
2000
  [(set (match_operand:VF 0 "register_operand")
2001
        (unspec:VF
2002
          [(match_operand:VF 1 "nonimmediate_operand")
2003
           (match_operand:VF 2 "nonimmediate_operand")
2004
           (match_operand:VF 3 "nonimmediate_operand")]
2005
          UNSPEC_FMADDSUB))]
2006
  "TARGET_FMA || TARGET_FMA4")
2007
 
2008
(define_insn "*fma4_fmaddsub_"
2009
  [(set (match_operand:VF 0 "register_operand" "=x,x")
2010
        (unspec:VF
2011
          [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
2012
           (match_operand:VF 2 "nonimmediate_operand" " x,m")
2013
           (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
2014
          UNSPEC_FMADDSUB))]
2015
  "TARGET_FMA4"
2016
  "vfmaddsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2017
  [(set_attr "type" "ssemuladd")
2018
   (set_attr "mode" "")])
2019
 
2020
(define_insn "*fma4_fmsubadd_"
2021
  [(set (match_operand:VF 0 "register_operand" "=x,x")
2022
        (unspec:VF
2023
          [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
2024
           (match_operand:VF 2 "nonimmediate_operand" " x,m")
2025
           (neg:VF
2026
             (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
2027
          UNSPEC_FMADDSUB))]
2028
  "TARGET_FMA4"
2029
  "vfmsubadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2030
  [(set_attr "type" "ssemuladd")
2031
   (set_attr "mode" "")])
2032
 
2033
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2034
;;
2035
;; FMA3 floating point multiply/accumulate instructions.
2036
;;
2037
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2038
 
2039
(define_insn "*fma_fmadd_"
2040
  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2041
        (fma:FMAMODE
2042
          (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
2043
          (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2044
          (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
2045
  "TARGET_FMA"
2046
  "@
2047
   vfmadd132\t{%2, %3, %0|%0, %3, %2}
2048
   vfmadd213\t{%3, %2, %0|%0, %2, %3}
2049
   vfmadd231\t{%2, %1, %0|%0, %1, %2}"
2050
  [(set_attr "type" "ssemuladd")
2051
   (set_attr "mode" "")])
2052
 
2053
(define_insn "*fma_fmsub_"
2054
  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2055
        (fma:FMAMODE
2056
          (match_operand:FMAMODE   1 "nonimmediate_operand" "%0, 0,x")
2057
          (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm")
2058
          (neg:FMAMODE
2059
            (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2060
  "TARGET_FMA"
2061
  "@
2062
   vfmsub132\t{%2, %3, %0|%0, %3, %2}
2063
   vfmsub213\t{%3, %2, %0|%0, %2, %3}
2064
   vfmsub231\t{%2, %1, %0|%0, %1, %2}"
2065
  [(set_attr "type" "ssemuladd")
2066
   (set_attr "mode" "")])
2067
 
2068
(define_insn "*fma_fnmadd_"
2069
  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2070
        (fma:FMAMODE
2071
          (neg:FMAMODE
2072
            (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2073
          (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm")
2074
          (match_operand:FMAMODE   3 "nonimmediate_operand" " x,xm,0")))]
2075
  "TARGET_FMA"
2076
  "@
2077
   vfnmadd132\t{%2, %3, %0|%0, %3, %2}
2078
   vfnmadd213\t{%3, %2, %0|%0, %2, %3}
2079
   vfnmadd231\t{%2, %1, %0|%0, %1, %2}"
2080
  [(set_attr "type" "ssemuladd")
2081
   (set_attr "mode" "")])
2082
 
2083
(define_insn "*fma_fnmsub_"
2084
  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2085
        (fma:FMAMODE
2086
          (neg:FMAMODE
2087
            (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2088
          (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm")
2089
          (neg:FMAMODE
2090
            (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2091
  "TARGET_FMA"
2092
  "@
2093
   vfnmsub132\t{%2, %3, %0|%0, %3, %2}
2094
   vfnmsub213\t{%3, %2, %0|%0, %2, %3}
2095
   vfnmsub231\t{%2, %1, %0|%0, %1, %2}"
2096
  [(set_attr "type" "ssemuladd")
2097
   (set_attr "mode" "")])
2098
 
2099
(define_insn "*fma_fmaddsub_"
2100
  [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2101
        (unspec:VF
2102
          [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2103
           (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2104
           (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2105
          UNSPEC_FMADDSUB))]
2106
  "TARGET_FMA"
2107
  "@
2108
   vfmaddsub132\t{%2, %3, %0|%0, %3, %2}
2109
   vfmaddsub213\t{%3, %2, %0|%0, %2, %3}
2110
   vfmaddsub231\t{%2, %1, %0|%0, %1, %2}"
2111
  [(set_attr "type" "ssemuladd")
2112
   (set_attr "mode" "")])
2113
 
2114
(define_insn "*fma_fmsubadd_"
2115
  [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2116
        (unspec:VF
2117
          [(match_operand:VF   1 "nonimmediate_operand" "%0, 0,x")
2118
           (match_operand:VF   2 "nonimmediate_operand" "xm, x,xm")
2119
           (neg:VF
2120
             (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2121
          UNSPEC_FMADDSUB))]
2122
  "TARGET_FMA"
2123
  "@
2124
   vfmsubadd132\t{%2, %3, %0|%0, %3, %2}
2125
   vfmsubadd213\t{%3, %2, %0|%0, %2, %3}
2126
   vfmsubadd231\t{%2, %1, %0|%0, %1, %2}"
2127
  [(set_attr "type" "ssemuladd")
2128
   (set_attr "mode" "")])
2129
 
2130
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2131
;;
2132
;; Parallel single-precision floating point conversion operations
2133
;;
2134
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2135
 
2136
(define_insn "sse_cvtpi2ps"
2137
  [(set (match_operand:V4SF 0 "register_operand" "=x")
2138
        (vec_merge:V4SF
2139
          (vec_duplicate:V4SF
2140
            (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2141
          (match_operand:V4SF 1 "register_operand" "0")
2142
          (const_int 3)))]
2143
  "TARGET_SSE"
2144
  "cvtpi2ps\t{%2, %0|%0, %2}"
2145
  [(set_attr "type" "ssecvt")
2146
   (set_attr "mode" "V4SF")])
2147
 
2148
(define_insn "sse_cvtps2pi"
2149
  [(set (match_operand:V2SI 0 "register_operand" "=y")
2150
        (vec_select:V2SI
2151
          (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2152
                       UNSPEC_FIX_NOTRUNC)
2153
          (parallel [(const_int 0) (const_int 1)])))]
2154
  "TARGET_SSE"
2155
  "cvtps2pi\t{%1, %0|%0, %1}"
2156
  [(set_attr "type" "ssecvt")
2157
   (set_attr "unit" "mmx")
2158
   (set_attr "mode" "DI")])
2159
 
2160
(define_insn "sse_cvttps2pi"
2161
  [(set (match_operand:V2SI 0 "register_operand" "=y")
2162
        (vec_select:V2SI
2163
          (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2164
          (parallel [(const_int 0) (const_int 1)])))]
2165
  "TARGET_SSE"
2166
  "cvttps2pi\t{%1, %0|%0, %1}"
2167
  [(set_attr "type" "ssecvt")
2168
   (set_attr "unit" "mmx")
2169
   (set_attr "prefix_rep" "0")
2170
   (set_attr "mode" "SF")])
2171
 
2172
(define_insn "sse_cvtsi2ss"
2173
  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2174
        (vec_merge:V4SF
2175
          (vec_duplicate:V4SF
2176
            (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2177
          (match_operand:V4SF 1 "register_operand" "0,0,x")
2178
          (const_int 1)))]
2179
  "TARGET_SSE"
2180
  "@
2181
   cvtsi2ss\t{%2, %0|%0, %2}
2182
   cvtsi2ss\t{%2, %0|%0, %2}
2183
   vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2184
  [(set_attr "isa" "noavx,noavx,avx")
2185
   (set_attr "type" "sseicvt")
2186
   (set_attr "athlon_decode" "vector,double,*")
2187
   (set_attr "amdfam10_decode" "vector,double,*")
2188
   (set_attr "bdver1_decode" "double,direct,*")
2189
   (set_attr "prefix" "orig,orig,vex")
2190
   (set_attr "mode" "SF")])
2191
 
2192
(define_insn "sse_cvtsi2ssq"
2193
  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2194
        (vec_merge:V4SF
2195
          (vec_duplicate:V4SF
2196
            (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2197
          (match_operand:V4SF 1 "register_operand" "0,0,x")
2198
          (const_int 1)))]
2199
  "TARGET_SSE && TARGET_64BIT"
2200
  "@
2201
   cvtsi2ssq\t{%2, %0|%0, %2}
2202
   cvtsi2ssq\t{%2, %0|%0, %2}
2203
   vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2204
  [(set_attr "isa" "noavx,noavx,avx")
2205
   (set_attr "type" "sseicvt")
2206
   (set_attr "athlon_decode" "vector,double,*")
2207
   (set_attr "amdfam10_decode" "vector,double,*")
2208
   (set_attr "bdver1_decode" "double,direct,*")
2209
   (set_attr "length_vex" "*,*,4")
2210
   (set_attr "prefix_rex" "1,1,*")
2211
   (set_attr "prefix" "orig,orig,vex")
2212
   (set_attr "mode" "SF")])
2213
 
2214
(define_insn "sse_cvtss2si"
2215
  [(set (match_operand:SI 0 "register_operand" "=r,r")
2216
        (unspec:SI
2217
          [(vec_select:SF
2218
             (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2219
             (parallel [(const_int 0)]))]
2220
          UNSPEC_FIX_NOTRUNC))]
2221
  "TARGET_SSE"
2222
  "%vcvtss2si\t{%1, %0|%0, %1}"
2223
  [(set_attr "type" "sseicvt")
2224
   (set_attr "athlon_decode" "double,vector")
2225
   (set_attr "bdver1_decode" "double,double")
2226
   (set_attr "prefix_rep" "1")
2227
   (set_attr "prefix" "maybe_vex")
2228
   (set_attr "mode" "SI")])
2229
 
2230
(define_insn "sse_cvtss2si_2"
2231
  [(set (match_operand:SI 0 "register_operand" "=r,r")
2232
        (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2233
                   UNSPEC_FIX_NOTRUNC))]
2234
  "TARGET_SSE"
2235
  "%vcvtss2si\t{%1, %0|%0, %1}"
2236
  [(set_attr "type" "sseicvt")
2237
   (set_attr "athlon_decode" "double,vector")
2238
   (set_attr "amdfam10_decode" "double,double")
2239
   (set_attr "bdver1_decode" "double,double")
2240
   (set_attr "prefix_rep" "1")
2241
   (set_attr "prefix" "maybe_vex")
2242
   (set_attr "mode" "SI")])
2243
 
2244
(define_insn "sse_cvtss2siq"
2245
  [(set (match_operand:DI 0 "register_operand" "=r,r")
2246
        (unspec:DI
2247
          [(vec_select:SF
2248
             (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2249
             (parallel [(const_int 0)]))]
2250
          UNSPEC_FIX_NOTRUNC))]
2251
  "TARGET_SSE && TARGET_64BIT"
2252
  "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2253
  [(set_attr "type" "sseicvt")
2254
   (set_attr "athlon_decode" "double,vector")
2255
   (set_attr "bdver1_decode" "double,double")
2256
   (set_attr "prefix_rep" "1")
2257
   (set_attr "prefix" "maybe_vex")
2258
   (set_attr "mode" "DI")])
2259
 
2260
(define_insn "sse_cvtss2siq_2"
2261
  [(set (match_operand:DI 0 "register_operand" "=r,r")
2262
        (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2263
                   UNSPEC_FIX_NOTRUNC))]
2264
  "TARGET_SSE && TARGET_64BIT"
2265
  "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2266
  [(set_attr "type" "sseicvt")
2267
   (set_attr "athlon_decode" "double,vector")
2268
   (set_attr "amdfam10_decode" "double,double")
2269
   (set_attr "bdver1_decode" "double,double")
2270
   (set_attr "prefix_rep" "1")
2271
   (set_attr "prefix" "maybe_vex")
2272
   (set_attr "mode" "DI")])
2273
 
2274
(define_insn "sse_cvttss2si"
2275
  [(set (match_operand:SI 0 "register_operand" "=r,r")
2276
        (fix:SI
2277
          (vec_select:SF
2278
            (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2279
            (parallel [(const_int 0)]))))]
2280
  "TARGET_SSE"
2281
  "%vcvttss2si\t{%1, %0|%0, %1}"
2282
  [(set_attr "type" "sseicvt")
2283
   (set_attr "athlon_decode" "double,vector")
2284
   (set_attr "amdfam10_decode" "double,double")
2285
   (set_attr "bdver1_decode" "double,double")
2286
   (set_attr "prefix_rep" "1")
2287
   (set_attr "prefix" "maybe_vex")
2288
   (set_attr "mode" "SI")])
2289
 
2290
(define_insn "sse_cvttss2siq"
2291
  [(set (match_operand:DI 0 "register_operand" "=r,r")
2292
        (fix:DI
2293
          (vec_select:SF
2294
            (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2295
            (parallel [(const_int 0)]))))]
2296
  "TARGET_SSE && TARGET_64BIT"
2297
  "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2298
  [(set_attr "type" "sseicvt")
2299
   (set_attr "athlon_decode" "double,vector")
2300
   (set_attr "amdfam10_decode" "double,double")
2301
   (set_attr "bdver1_decode" "double,double")
2302
   (set_attr "prefix_rep" "1")
2303
   (set_attr "prefix" "maybe_vex")
2304
   (set_attr "mode" "DI")])
2305
 
2306
(define_insn "float2"
2307
  [(set (match_operand:VF1 0 "register_operand" "=x")
2308
        (float:VF1
2309
          (match_operand: 1 "nonimmediate_operand" "xm")))]
2310
  "TARGET_SSE2"
2311
  "%vcvtdq2ps\t{%1, %0|%0, %1}"
2312
  [(set_attr "type" "ssecvt")
2313
   (set_attr "prefix" "maybe_vex")
2314
   (set_attr "mode" "")])
2315
 
2316
(define_expand "floatuns2"
2317
  [(match_operand:VF1 0 "register_operand" "")
2318
   (match_operand: 1 "register_operand" "")]
2319
  "TARGET_SSE2 && (mode == V4SFmode || TARGET_AVX2)"
2320
{
2321
  ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2322
  DONE;
2323
})
2324
 
2325
(define_insn "avx_cvtps2dq256"
2326
  [(set (match_operand:V8SI 0 "register_operand" "=x")
2327
        (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2328
                     UNSPEC_FIX_NOTRUNC))]
2329
  "TARGET_AVX"
2330
  "vcvtps2dq\t{%1, %0|%0, %1}"
2331
  [(set_attr "type" "ssecvt")
2332
   (set_attr "prefix" "vex")
2333
   (set_attr "mode" "OI")])
2334
 
2335
(define_insn "sse2_cvtps2dq"
2336
  [(set (match_operand:V4SI 0 "register_operand" "=x")
2337
        (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2338
                     UNSPEC_FIX_NOTRUNC))]
2339
  "TARGET_SSE2"
2340
  "%vcvtps2dq\t{%1, %0|%0, %1}"
2341
  [(set_attr "type" "ssecvt")
2342
   (set (attr "prefix_data16")
2343
     (if_then_else
2344
       (match_test "TARGET_AVX")
2345
     (const_string "*")
2346
     (const_string "1")))
2347
   (set_attr "prefix" "maybe_vex")
2348
   (set_attr "mode" "TI")])
2349
 
2350
(define_insn "fix_truncv8sfv8si2"
2351
  [(set (match_operand:V8SI 0 "register_operand" "=x")
2352
        (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2353
  "TARGET_AVX"
2354
  "vcvttps2dq\t{%1, %0|%0, %1}"
2355
  [(set_attr "type" "ssecvt")
2356
   (set_attr "prefix" "vex")
2357
   (set_attr "mode" "OI")])
2358
 
2359
(define_insn "fix_truncv4sfv4si2"
2360
  [(set (match_operand:V4SI 0 "register_operand" "=x")
2361
        (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2362
  "TARGET_SSE2"
2363
  "%vcvttps2dq\t{%1, %0|%0, %1}"
2364
  [(set_attr "type" "ssecvt")
2365
   (set (attr "prefix_rep")
2366
     (if_then_else
2367
       (match_test "TARGET_AVX")
2368
     (const_string "*")
2369
     (const_string "1")))
2370
   (set (attr "prefix_data16")
2371
     (if_then_else
2372
       (match_test "TARGET_AVX")
2373
     (const_string "*")
2374
     (const_string "0")))
2375
   (set_attr "prefix_data16" "0")
2376
   (set_attr "prefix" "maybe_vex")
2377
   (set_attr "mode" "TI")])
2378
 
2379
(define_expand "fixuns_trunc2"
2380
  [(match_operand: 0 "register_operand" "")
2381
   (match_operand:VF1 1 "register_operand" "")]
2382
  "TARGET_SSE2"
2383
{
2384
  rtx tmp[3];
2385
  tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2386
  tmp[1] = gen_reg_rtx (mode);
2387
  emit_insn (gen_fix_trunc2 (tmp[1], tmp[0]));
2388
  emit_insn (gen_xor3 (operands[0], tmp[1], tmp[2]));
2389
  DONE;
2390
})
2391
 
2392
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2393
;;
2394
;; Parallel double-precision floating point conversion operations
2395
;;
2396
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2397
 
2398
(define_insn "sse2_cvtpi2pd"
2399
  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2400
        (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2401
  "TARGET_SSE2"
2402
  "cvtpi2pd\t{%1, %0|%0, %1}"
2403
  [(set_attr "type" "ssecvt")
2404
   (set_attr "unit" "mmx,*")
2405
   (set_attr "prefix_data16" "1,*")
2406
   (set_attr "mode" "V2DF")])
2407
 
2408
(define_insn "sse2_cvtpd2pi"
2409
  [(set (match_operand:V2SI 0 "register_operand" "=y")
2410
        (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2411
                     UNSPEC_FIX_NOTRUNC))]
2412
  "TARGET_SSE2"
2413
  "cvtpd2pi\t{%1, %0|%0, %1}"
2414
  [(set_attr "type" "ssecvt")
2415
   (set_attr "unit" "mmx")
2416
   (set_attr "bdver1_decode" "double")
2417
   (set_attr "prefix_data16" "1")
2418
   (set_attr "mode" "DI")])
2419
 
2420
(define_insn "sse2_cvttpd2pi"
2421
  [(set (match_operand:V2SI 0 "register_operand" "=y")
2422
        (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2423
  "TARGET_SSE2"
2424
  "cvttpd2pi\t{%1, %0|%0, %1}"
2425
  [(set_attr "type" "ssecvt")
2426
   (set_attr "unit" "mmx")
2427
   (set_attr "bdver1_decode" "double")
2428
   (set_attr "prefix_data16" "1")
2429
   (set_attr "mode" "TI")])
2430
 
2431
(define_insn "sse2_cvtsi2sd"
2432
  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2433
        (vec_merge:V2DF
2434
          (vec_duplicate:V2DF
2435
            (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2436
          (match_operand:V2DF 1 "register_operand" "0,0,x")
2437
          (const_int 1)))]
2438
  "TARGET_SSE2"
2439
  "@
2440
   cvtsi2sd\t{%2, %0|%0, %2}
2441
   cvtsi2sd\t{%2, %0|%0, %2}
2442
   vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2443
  [(set_attr "isa" "noavx,noavx,avx")
2444
   (set_attr "type" "sseicvt")
2445
   (set_attr "athlon_decode" "double,direct,*")
2446
   (set_attr "amdfam10_decode" "vector,double,*")
2447
   (set_attr "bdver1_decode" "double,direct,*")
2448
   (set_attr "prefix" "orig,orig,vex")
2449
   (set_attr "mode" "DF")])
2450
 
2451
(define_insn "sse2_cvtsi2sdq"
2452
  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2453
        (vec_merge:V2DF
2454
          (vec_duplicate:V2DF
2455
            (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2456
          (match_operand:V2DF 1 "register_operand" "0,0,x")
2457
          (const_int 1)))]
2458
  "TARGET_SSE2 && TARGET_64BIT"
2459
  "@
2460
   cvtsi2sdq\t{%2, %0|%0, %2}
2461
   cvtsi2sdq\t{%2, %0|%0, %2}
2462
   vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2463
  [(set_attr "isa" "noavx,noavx,avx")
2464
   (set_attr "type" "sseicvt")
2465
   (set_attr "athlon_decode" "double,direct,*")
2466
   (set_attr "amdfam10_decode" "vector,double,*")
2467
   (set_attr "bdver1_decode" "double,direct,*")
2468
   (set_attr "length_vex" "*,*,4")
2469
   (set_attr "prefix_rex" "1,1,*")
2470
   (set_attr "prefix" "orig,orig,vex")
2471
   (set_attr "mode" "DF")])
2472
 
2473
(define_insn "sse2_cvtsd2si"
2474
  [(set (match_operand:SI 0 "register_operand" "=r,r")
2475
        (unspec:SI
2476
          [(vec_select:DF
2477
             (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2478
             (parallel [(const_int 0)]))]
2479
          UNSPEC_FIX_NOTRUNC))]
2480
  "TARGET_SSE2"
2481
  "%vcvtsd2si\t{%1, %0|%0, %1}"
2482
  [(set_attr "type" "sseicvt")
2483
   (set_attr "athlon_decode" "double,vector")
2484
   (set_attr "bdver1_decode" "double,double")
2485
   (set_attr "prefix_rep" "1")
2486
   (set_attr "prefix" "maybe_vex")
2487
   (set_attr "mode" "SI")])
2488
 
2489
(define_insn "sse2_cvtsd2si_2"
2490
  [(set (match_operand:SI 0 "register_operand" "=r,r")
2491
        (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2492
                   UNSPEC_FIX_NOTRUNC))]
2493
  "TARGET_SSE2"
2494
  "%vcvtsd2si\t{%1, %0|%0, %1}"
2495
  [(set_attr "type" "sseicvt")
2496
   (set_attr "athlon_decode" "double,vector")
2497
   (set_attr "amdfam10_decode" "double,double")
2498
   (set_attr "bdver1_decode" "double,double")
2499
   (set_attr "prefix_rep" "1")
2500
   (set_attr "prefix" "maybe_vex")
2501
   (set_attr "mode" "SI")])
2502
 
2503
(define_insn "sse2_cvtsd2siq"
2504
  [(set (match_operand:DI 0 "register_operand" "=r,r")
2505
        (unspec:DI
2506
          [(vec_select:DF
2507
             (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2508
             (parallel [(const_int 0)]))]
2509
          UNSPEC_FIX_NOTRUNC))]
2510
  "TARGET_SSE2 && TARGET_64BIT"
2511
  "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2512
  [(set_attr "type" "sseicvt")
2513
   (set_attr "athlon_decode" "double,vector")
2514
   (set_attr "bdver1_decode" "double,double")
2515
   (set_attr "prefix_rep" "1")
2516
   (set_attr "prefix" "maybe_vex")
2517
   (set_attr "mode" "DI")])
2518
 
2519
(define_insn "sse2_cvtsd2siq_2"
2520
  [(set (match_operand:DI 0 "register_operand" "=r,r")
2521
        (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2522
                   UNSPEC_FIX_NOTRUNC))]
2523
  "TARGET_SSE2 && TARGET_64BIT"
2524
  "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2525
  [(set_attr "type" "sseicvt")
2526
   (set_attr "athlon_decode" "double,vector")
2527
   (set_attr "amdfam10_decode" "double,double")
2528
   (set_attr "bdver1_decode" "double,double")
2529
   (set_attr "prefix_rep" "1")
2530
   (set_attr "prefix" "maybe_vex")
2531
   (set_attr "mode" "DI")])
2532
 
2533
(define_insn "sse2_cvttsd2si"
2534
  [(set (match_operand:SI 0 "register_operand" "=r,r")
2535
        (fix:SI
2536
          (vec_select:DF
2537
            (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2538
            (parallel [(const_int 0)]))))]
2539
  "TARGET_SSE2"
2540
  "%vcvttsd2si\t{%1, %0|%0, %1}"
2541
  [(set_attr "type" "sseicvt")
2542
   (set_attr "athlon_decode" "double,vector")
2543
   (set_attr "amdfam10_decode" "double,double")
2544
   (set_attr "bdver1_decode" "double,double")
2545
   (set_attr "prefix_rep" "1")
2546
   (set_attr "prefix" "maybe_vex")
2547
   (set_attr "mode" "SI")])
2548
 
2549
(define_insn "sse2_cvttsd2siq"
2550
  [(set (match_operand:DI 0 "register_operand" "=r,r")
2551
        (fix:DI
2552
          (vec_select:DF
2553
            (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2554
            (parallel [(const_int 0)]))))]
2555
  "TARGET_SSE2 && TARGET_64BIT"
2556
  "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2557
  [(set_attr "type" "sseicvt")
2558
   (set_attr "athlon_decode" "double,vector")
2559
   (set_attr "amdfam10_decode" "double,double")
2560
   (set_attr "bdver1_decode" "double,double")
2561
   (set_attr "prefix_rep" "1")
2562
   (set_attr "prefix" "maybe_vex")
2563
   (set_attr "mode" "DI")])
2564
 
2565
(define_insn "floatv4siv4df2"
2566
  [(set (match_operand:V4DF 0 "register_operand" "=x")
2567
        (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2568
  "TARGET_AVX"
2569
  "vcvtdq2pd\t{%1, %0|%0, %1}"
2570
  [(set_attr "type" "ssecvt")
2571
   (set_attr "prefix" "vex")
2572
   (set_attr "mode" "V4DF")])
2573
 
2574
(define_insn "avx_cvtdq2pd256_2"
2575
  [(set (match_operand:V4DF 0 "register_operand" "=x")
2576
        (float:V4DF
2577
          (vec_select:V4SI
2578
            (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2579
            (parallel [(const_int 0) (const_int 1)
2580
                       (const_int 2) (const_int 3)]))))]
2581
  "TARGET_AVX"
2582
  "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2583
  [(set_attr "type" "ssecvt")
2584
   (set_attr "prefix" "vex")
2585
   (set_attr "mode" "V4DF")])
2586
 
2587
(define_insn "sse2_cvtdq2pd"
2588
  [(set (match_operand:V2DF 0 "register_operand" "=x")
2589
        (float:V2DF
2590
          (vec_select:V2SI
2591
            (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2592
            (parallel [(const_int 0) (const_int 1)]))))]
2593
  "TARGET_SSE2"
2594
  "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2595
  [(set_attr "type" "ssecvt")
2596
   (set_attr "prefix" "maybe_vex")
2597
   (set_attr "mode" "V2DF")])
2598
 
2599
(define_insn "avx_cvtpd2dq256"
2600
  [(set (match_operand:V4SI 0 "register_operand" "=x")
2601
        (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2602
                     UNSPEC_FIX_NOTRUNC))]
2603
  "TARGET_AVX"
2604
  "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2605
  [(set_attr "type" "ssecvt")
2606
   (set_attr "prefix" "vex")
2607
   (set_attr "mode" "OI")])
2608
 
2609
(define_expand "avx_cvtpd2dq256_2"
2610
  [(set (match_operand:V8SI 0 "register_operand" "")
2611
        (vec_concat:V8SI
2612
          (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2613
                       UNSPEC_FIX_NOTRUNC)
2614
          (match_dup 2)))]
2615
  "TARGET_AVX"
2616
  "operands[2] = CONST0_RTX (V4SImode);")
2617
 
2618
(define_insn "*avx_cvtpd2dq256_2"
2619
  [(set (match_operand:V8SI 0 "register_operand" "=x")
2620
        (vec_concat:V8SI
2621
          (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2622
                       UNSPEC_FIX_NOTRUNC)
2623
          (match_operand:V4SI 2 "const0_operand" "")))]
2624
  "TARGET_AVX"
2625
  "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2626
  [(set_attr "type" "ssecvt")
2627
   (set_attr "prefix" "vex")
2628
   (set_attr "mode" "OI")])
2629
 
2630
(define_expand "sse2_cvtpd2dq"
2631
  [(set (match_operand:V4SI 0 "register_operand" "")
2632
        (vec_concat:V4SI
2633
          (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2634
                       UNSPEC_FIX_NOTRUNC)
2635
          (match_dup 2)))]
2636
  "TARGET_SSE2"
2637
  "operands[2] = CONST0_RTX (V2SImode);")
2638
 
2639
(define_insn "*sse2_cvtpd2dq"
2640
  [(set (match_operand:V4SI 0 "register_operand" "=x")
2641
        (vec_concat:V4SI
2642
          (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2643
                       UNSPEC_FIX_NOTRUNC)
2644
          (match_operand:V2SI 2 "const0_operand" "")))]
2645
  "TARGET_SSE2"
2646
{
2647
  if (TARGET_AVX)
2648
    return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2649
  else
2650
    return "cvtpd2dq\t{%1, %0|%0, %1}";
2651
}
2652
  [(set_attr "type" "ssecvt")
2653
   (set_attr "prefix_rep" "1")
2654
   (set_attr "prefix_data16" "0")
2655
   (set_attr "prefix" "maybe_vex")
2656
   (set_attr "mode" "TI")
2657
   (set_attr "amdfam10_decode" "double")
2658
   (set_attr "athlon_decode" "vector")
2659
   (set_attr "bdver1_decode" "double")])
2660
 
2661
(define_insn "fix_truncv4dfv4si2"
2662
  [(set (match_operand:V4SI 0 "register_operand" "=x")
2663
        (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2664
  "TARGET_AVX"
2665
  "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2666
  [(set_attr "type" "ssecvt")
2667
   (set_attr "prefix" "vex")
2668
   (set_attr "mode" "OI")])
2669
 
2670
(define_expand "avx_cvttpd2dq256_2"
2671
  [(set (match_operand:V8SI 0 "register_operand" "")
2672
        (vec_concat:V8SI
2673
          (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2674
          (match_dup 2)))]
2675
  "TARGET_AVX"
2676
  "operands[2] = CONST0_RTX (V4SImode);")
2677
 
2678
(define_insn "*avx_cvttpd2dq256_2"
2679
  [(set (match_operand:V8SI 0 "register_operand" "=x")
2680
        (vec_concat:V8SI
2681
          (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2682
          (match_operand:V4SI 2 "const0_operand" "")))]
2683
  "TARGET_AVX"
2684
  "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2685
  [(set_attr "type" "ssecvt")
2686
   (set_attr "prefix" "vex")
2687
   (set_attr "mode" "OI")])
2688
 
2689
(define_expand "sse2_cvttpd2dq"
2690
  [(set (match_operand:V4SI 0 "register_operand" "")
2691
        (vec_concat:V4SI
2692
          (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2693
          (match_dup 2)))]
2694
  "TARGET_SSE2"
2695
  "operands[2] = CONST0_RTX (V2SImode);")
2696
 
2697
(define_insn "*sse2_cvttpd2dq"
2698
  [(set (match_operand:V4SI 0 "register_operand" "=x")
2699
        (vec_concat:V4SI
2700
          (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2701
          (match_operand:V2SI 2 "const0_operand" "")))]
2702
  "TARGET_SSE2"
2703
{
2704
  if (TARGET_AVX)
2705
    return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2706
  else
2707
    return "cvttpd2dq\t{%1, %0|%0, %1}";
2708
}
2709
  [(set_attr "type" "ssecvt")
2710
   (set_attr "amdfam10_decode" "double")
2711
   (set_attr "athlon_decode" "vector")
2712
   (set_attr "bdver1_decode" "double")
2713
   (set_attr "prefix" "maybe_vex")
2714
   (set_attr "mode" "TI")])
2715
 
2716
(define_insn "sse2_cvtsd2ss"
2717
  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2718
        (vec_merge:V4SF
2719
          (vec_duplicate:V4SF
2720
            (float_truncate:V2SF
2721
              (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2722
          (match_operand:V4SF 1 "register_operand" "0,0,x")
2723
          (const_int 1)))]
2724
  "TARGET_SSE2"
2725
  "@
2726
   cvtsd2ss\t{%2, %0|%0, %2}
2727
   cvtsd2ss\t{%2, %0|%0, %2}
2728
   vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2729
  [(set_attr "isa" "noavx,noavx,avx")
2730
   (set_attr "type" "ssecvt")
2731
   (set_attr "athlon_decode" "vector,double,*")
2732
   (set_attr "amdfam10_decode" "vector,double,*")
2733
   (set_attr "bdver1_decode" "direct,direct,*")
2734
   (set_attr "prefix" "orig,orig,vex")
2735
   (set_attr "mode" "SF")])
2736
 
2737
(define_insn "sse2_cvtss2sd"
2738
  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2739
        (vec_merge:V2DF
2740
          (float_extend:V2DF
2741
            (vec_select:V2SF
2742
              (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2743
              (parallel [(const_int 0) (const_int 1)])))
2744
          (match_operand:V2DF 1 "register_operand" "0,0,x")
2745
          (const_int 1)))]
2746
  "TARGET_SSE2"
2747
  "@
2748
   cvtss2sd\t{%2, %0|%0, %2}
2749
   cvtss2sd\t{%2, %0|%0, %2}
2750
   vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2751
  [(set_attr "isa" "noavx,noavx,avx")
2752
   (set_attr "type" "ssecvt")
2753
   (set_attr "amdfam10_decode" "vector,double,*")
2754
   (set_attr "athlon_decode" "direct,direct,*")
2755
   (set_attr "bdver1_decode" "direct,direct,*")
2756
   (set_attr "prefix" "orig,orig,vex")
2757
   (set_attr "mode" "DF")])
2758
 
2759
(define_insn "avx_cvtpd2ps256"
2760
  [(set (match_operand:V4SF 0 "register_operand" "=x")
2761
        (float_truncate:V4SF
2762
          (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2763
  "TARGET_AVX"
2764
  "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2765
  [(set_attr "type" "ssecvt")
2766
   (set_attr "prefix" "vex")
2767
   (set_attr "mode" "V4SF")])
2768
 
2769
(define_expand "sse2_cvtpd2ps"
2770
  [(set (match_operand:V4SF 0 "register_operand" "")
2771
        (vec_concat:V4SF
2772
          (float_truncate:V2SF
2773
            (match_operand:V2DF 1 "nonimmediate_operand" ""))
2774
          (match_dup 2)))]
2775
  "TARGET_SSE2"
2776
  "operands[2] = CONST0_RTX (V2SFmode);")
2777
 
2778
(define_insn "*sse2_cvtpd2ps"
2779
  [(set (match_operand:V4SF 0 "register_operand" "=x")
2780
        (vec_concat:V4SF
2781
          (float_truncate:V2SF
2782
            (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2783
          (match_operand:V2SF 2 "const0_operand" "")))]
2784
  "TARGET_SSE2"
2785
{
2786
  if (TARGET_AVX)
2787
    return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2788
  else
2789
    return "cvtpd2ps\t{%1, %0|%0, %1}";
2790
}
2791
  [(set_attr "type" "ssecvt")
2792
   (set_attr "amdfam10_decode" "double")
2793
   (set_attr "athlon_decode" "vector")
2794
   (set_attr "bdver1_decode" "double")
2795
   (set_attr "prefix_data16" "1")
2796
   (set_attr "prefix" "maybe_vex")
2797
   (set_attr "mode" "V4SF")])
2798
 
2799
(define_insn "avx_cvtps2pd256"
2800
  [(set (match_operand:V4DF 0 "register_operand" "=x")
2801
        (float_extend:V4DF
2802
          (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2803
  "TARGET_AVX"
2804
  "vcvtps2pd\t{%1, %0|%0, %1}"
2805
  [(set_attr "type" "ssecvt")
2806
   (set_attr "prefix" "vex")
2807
   (set_attr "mode" "V4DF")])
2808
 
2809
(define_insn "*avx_cvtps2pd256_2"
2810
  [(set (match_operand:V4DF 0 "register_operand" "=x")
2811
        (float_extend:V4DF
2812
          (vec_select:V4SF
2813
            (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2814
            (parallel [(const_int 0) (const_int 1)
2815
                       (const_int 2) (const_int 3)]))))]
2816
  "TARGET_AVX"
2817
  "vcvtps2pd\t{%x1, %0|%0, %x1}"
2818
  [(set_attr "type" "ssecvt")
2819
   (set_attr "prefix" "vex")
2820
   (set_attr "mode" "V4DF")])
2821
 
2822
(define_insn "sse2_cvtps2pd"
2823
  [(set (match_operand:V2DF 0 "register_operand" "=x")
2824
        (float_extend:V2DF
2825
          (vec_select:V2SF
2826
            (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2827
            (parallel [(const_int 0) (const_int 1)]))))]
2828
  "TARGET_SSE2"
2829
  "%vcvtps2pd\t{%1, %0|%0, %q1}"
2830
  [(set_attr "type" "ssecvt")
2831
   (set_attr "amdfam10_decode" "direct")
2832
   (set_attr "athlon_decode" "double")
2833
   (set_attr "bdver1_decode" "double")
2834
   (set_attr "prefix_data16" "0")
2835
   (set_attr "prefix" "maybe_vex")
2836
   (set_attr "mode" "V2DF")])
2837
 
2838
(define_expand "vec_unpacks_hi_v4sf"
2839
  [(set (match_dup 2)
2840
   (vec_select:V4SF
2841
     (vec_concat:V8SF
2842
       (match_dup 2)
2843
       (match_operand:V4SF 1 "nonimmediate_operand" ""))
2844
     (parallel [(const_int 6) (const_int 7)
2845
                (const_int 2) (const_int 3)])))
2846
  (set (match_operand:V2DF 0 "register_operand" "")
2847
   (float_extend:V2DF
2848
     (vec_select:V2SF
2849
       (match_dup 2)
2850
       (parallel [(const_int 0) (const_int 1)]))))]
2851
  "TARGET_SSE2"
2852
  "operands[2] = gen_reg_rtx (V4SFmode);")
2853
 
2854
(define_expand "vec_unpacks_hi_v8sf"
2855
  [(set (match_dup 2)
2856
        (vec_select:V4SF
2857
          (match_operand:V8SF 1 "nonimmediate_operand" "")
2858
          (parallel [(const_int 4) (const_int 5)
2859
                     (const_int 6) (const_int 7)])))
2860
   (set (match_operand:V4DF 0 "register_operand" "")
2861
        (float_extend:V4DF
2862
          (match_dup 2)))]
2863
  "TARGET_AVX"
2864
  "operands[2] = gen_reg_rtx (V4SFmode);")
2865
 
2866
(define_expand "vec_unpacks_lo_v4sf"
2867
  [(set (match_operand:V2DF 0 "register_operand" "")
2868
        (float_extend:V2DF
2869
          (vec_select:V2SF
2870
            (match_operand:V4SF 1 "nonimmediate_operand" "")
2871
            (parallel [(const_int 0) (const_int 1)]))))]
2872
  "TARGET_SSE2")
2873
 
2874
(define_expand "vec_unpacks_lo_v8sf"
2875
  [(set (match_operand:V4DF 0 "register_operand" "")
2876
        (float_extend:V4DF
2877
          (vec_select:V4SF
2878
            (match_operand:V8SF 1 "nonimmediate_operand" "")
2879
            (parallel [(const_int 0) (const_int 1)
2880
                       (const_int 2) (const_int 3)]))))]
2881
  "TARGET_AVX")
2882
 
2883
(define_mode_attr sseunpackfltmode
2884
  [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2885
 
2886
(define_expand "vec_unpacks_float_hi_"
2887
  [(match_operand: 0 "register_operand" "")
2888
   (match_operand:VI2_AVX2 1 "register_operand" "")]
2889
  "TARGET_SSE2"
2890
{
2891
  rtx tmp = gen_reg_rtx (mode);
2892
 
2893
  emit_insn (gen_vec_unpacks_hi_ (tmp, operands[1]));
2894
  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2895
                          gen_rtx_FLOAT (mode, tmp)));
2896
  DONE;
2897
})
2898
 
2899
(define_expand "vec_unpacks_float_lo_"
2900
  [(match_operand: 0 "register_operand" "")
2901
   (match_operand:VI2_AVX2 1 "register_operand" "")]
2902
  "TARGET_SSE2"
2903
{
2904
  rtx tmp = gen_reg_rtx (mode);
2905
 
2906
  emit_insn (gen_vec_unpacks_lo_ (tmp, operands[1]));
2907
  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2908
                          gen_rtx_FLOAT (mode, tmp)));
2909
  DONE;
2910
})
2911
 
2912
(define_expand "vec_unpacku_float_hi_"
2913
  [(match_operand: 0 "register_operand" "")
2914
   (match_operand:VI2_AVX2 1 "register_operand" "")]
2915
  "TARGET_SSE2"
2916
{
2917
  rtx tmp = gen_reg_rtx (mode);
2918
 
2919
  emit_insn (gen_vec_unpacku_hi_ (tmp, operands[1]));
2920
  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2921
                          gen_rtx_FLOAT (mode, tmp)));
2922
  DONE;
2923
})
2924
 
2925
(define_expand "vec_unpacku_float_lo_"
2926
  [(match_operand: 0 "register_operand" "")
2927
   (match_operand:VI2_AVX2 1 "register_operand" "")]
2928
  "TARGET_SSE2"
2929
{
2930
  rtx tmp = gen_reg_rtx (mode);
2931
 
2932
  emit_insn (gen_vec_unpacku_lo_ (tmp, operands[1]));
2933
  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2934
                          gen_rtx_FLOAT (mode, tmp)));
2935
  DONE;
2936
})
2937
 
2938
(define_expand "vec_unpacks_float_hi_v4si"
2939
  [(set (match_dup 2)
2940
        (vec_select:V4SI
2941
          (match_operand:V4SI 1 "nonimmediate_operand" "")
2942
          (parallel [(const_int 2) (const_int 3)
2943
                     (const_int 2) (const_int 3)])))
2944
   (set (match_operand:V2DF 0 "register_operand" "")
2945
        (float:V2DF
2946
          (vec_select:V2SI
2947
          (match_dup 2)
2948
            (parallel [(const_int 0) (const_int 1)]))))]
2949
  "TARGET_SSE2"
2950
  "operands[2] = gen_reg_rtx (V4SImode);")
2951
 
2952
(define_expand "vec_unpacks_float_lo_v4si"
2953
  [(set (match_operand:V2DF 0 "register_operand" "")
2954
        (float:V2DF
2955
          (vec_select:V2SI
2956
            (match_operand:V4SI 1 "nonimmediate_operand" "")
2957
            (parallel [(const_int 0) (const_int 1)]))))]
2958
  "TARGET_SSE2")
2959
 
2960
(define_expand "vec_unpacks_float_hi_v8si"
2961
  [(set (match_dup 2)
2962
        (vec_select:V4SI
2963
          (match_operand:V8SI 1 "nonimmediate_operand" "")
2964
          (parallel [(const_int 4) (const_int 5)
2965
                     (const_int 6) (const_int 7)])))
2966
   (set (match_operand:V4DF 0 "register_operand" "")
2967
        (float:V4DF
2968
          (match_dup 2)))]
2969
  "TARGET_AVX"
2970
  "operands[2] = gen_reg_rtx (V4SImode);")
2971
 
2972
(define_expand "vec_unpacks_float_lo_v8si"
2973
  [(set (match_operand:V4DF 0 "register_operand" "")
2974
        (float:V4DF
2975
          (vec_select:V4SI
2976
            (match_operand:V8SI 1 "nonimmediate_operand" "")
2977
            (parallel [(const_int 0) (const_int 1)
2978
                       (const_int 2) (const_int 3)]))))]
2979
  "TARGET_AVX")
2980
 
2981
(define_expand "vec_unpacku_float_hi_v4si"
2982
  [(set (match_dup 5)
2983
        (vec_select:V4SI
2984
          (match_operand:V4SI 1 "nonimmediate_operand" "")
2985
          (parallel [(const_int 2) (const_int 3)
2986
                     (const_int 2) (const_int 3)])))
2987
   (set (match_dup 6)
2988
        (float:V2DF
2989
          (vec_select:V2SI
2990
          (match_dup 5)
2991
            (parallel [(const_int 0) (const_int 1)]))))
2992
   (set (match_dup 7)
2993
        (lt:V2DF (match_dup 6) (match_dup 3)))
2994
   (set (match_dup 8)
2995
        (and:V2DF (match_dup 7) (match_dup 4)))
2996
   (set (match_operand:V2DF 0 "register_operand" "")
2997
        (plus:V2DF (match_dup 6) (match_dup 8)))]
2998
  "TARGET_SSE2"
2999
{
3000
  REAL_VALUE_TYPE TWO32r;
3001
  rtx x;
3002
  int i;
3003
 
3004
  real_ldexp (&TWO32r, &dconst1, 32);
3005
  x = const_double_from_real_value (TWO32r, DFmode);
3006
 
3007
  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3008
  operands[4] = force_reg (V2DFmode,
3009
                           ix86_build_const_vector (V2DFmode, 1, x));
3010
 
3011
  operands[5] = gen_reg_rtx (V4SImode);
3012
 
3013
  for (i = 6; i < 9; i++)
3014
    operands[i] = gen_reg_rtx (V2DFmode);
3015
})
3016
 
3017
(define_expand "vec_unpacku_float_lo_v4si"
3018
  [(set (match_dup 5)
3019
        (float:V2DF
3020
          (vec_select:V2SI
3021
            (match_operand:V4SI 1 "nonimmediate_operand" "")
3022
            (parallel [(const_int 0) (const_int 1)]))))
3023
   (set (match_dup 6)
3024
        (lt:V2DF (match_dup 5) (match_dup 3)))
3025
   (set (match_dup 7)
3026
        (and:V2DF (match_dup 6) (match_dup 4)))
3027
   (set (match_operand:V2DF 0 "register_operand" "")
3028
        (plus:V2DF (match_dup 5) (match_dup 7)))]
3029
  "TARGET_SSE2"
3030
{
3031
  REAL_VALUE_TYPE TWO32r;
3032
  rtx x;
3033
  int i;
3034
 
3035
  real_ldexp (&TWO32r, &dconst1, 32);
3036
  x = const_double_from_real_value (TWO32r, DFmode);
3037
 
3038
  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3039
  operands[4] = force_reg (V2DFmode,
3040
                           ix86_build_const_vector (V2DFmode, 1, x));
3041
 
3042
  for (i = 5; i < 8; i++)
3043
    operands[i] = gen_reg_rtx (V2DFmode);
3044
})
3045
 
3046
(define_expand "vec_unpacku_float_hi_v8si"
3047
  [(match_operand:V4DF 0 "register_operand" "")
3048
   (match_operand:V8SI 1 "register_operand" "")]
3049
  "TARGET_AVX"
3050
{
3051
  REAL_VALUE_TYPE TWO32r;
3052
  rtx x, tmp[6];
3053
  int i;
3054
 
3055
  real_ldexp (&TWO32r, &dconst1, 32);
3056
  x = const_double_from_real_value (TWO32r, DFmode);
3057
 
3058
  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3059
  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3060
  tmp[5] = gen_reg_rtx (V4SImode);
3061
 
3062
  for (i = 2; i < 5; i++)
3063
    tmp[i] = gen_reg_rtx (V4DFmode);
3064
  emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
3065
  emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
3066
  emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3067
                          gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3068
  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3069
  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3070
  DONE;
3071
})
3072
 
3073
(define_expand "vec_unpacku_float_lo_v8si"
3074
  [(match_operand:V4DF 0 "register_operand" "")
3075
   (match_operand:V8SI 1 "nonimmediate_operand" "")]
3076
  "TARGET_AVX"
3077
{
3078
  REAL_VALUE_TYPE TWO32r;
3079
  rtx x, tmp[5];
3080
  int i;
3081
 
3082
  real_ldexp (&TWO32r, &dconst1, 32);
3083
  x = const_double_from_real_value (TWO32r, DFmode);
3084
 
3085
  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3086
  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3087
 
3088
  for (i = 2; i < 5; i++)
3089
    tmp[i] = gen_reg_rtx (V4DFmode);
3090
  emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3091
  emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3092
                          gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3093
  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3094
  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3095
  DONE;
3096
})
3097
 
3098
(define_expand "vec_pack_trunc_v4df"
3099
  [(set (match_dup 3)
3100
        (float_truncate:V4SF
3101
          (match_operand:V4DF 1 "nonimmediate_operand" "")))
3102
   (set (match_dup 4)
3103
        (float_truncate:V4SF
3104
          (match_operand:V4DF 2 "nonimmediate_operand" "")))
3105
   (set (match_operand:V8SF 0 "register_operand" "")
3106
        (vec_concat:V8SF
3107
          (match_dup 3)
3108
          (match_dup 4)))]
3109
  "TARGET_AVX"
3110
{
3111
  operands[3] = gen_reg_rtx (V4SFmode);
3112
  operands[4] = gen_reg_rtx (V4SFmode);
3113
})
3114
 
3115
(define_expand "vec_pack_trunc_v2df"
3116
  [(match_operand:V4SF 0 "register_operand" "")
3117
   (match_operand:V2DF 1 "nonimmediate_operand" "")
3118
   (match_operand:V2DF 2 "nonimmediate_operand" "")]
3119
  "TARGET_SSE2"
3120
{
3121
  rtx tmp0, tmp1;
3122
 
3123
  if (TARGET_AVX && !TARGET_PREFER_AVX128)
3124
    {
3125
      tmp0 = gen_reg_rtx (V4DFmode);
3126
      tmp1 = force_reg (V2DFmode, operands[1]);
3127
 
3128
      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3129
      emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
3130
    }
3131
  else
3132
    {
3133
      tmp0 = gen_reg_rtx (V4SFmode);
3134
      tmp1 = gen_reg_rtx (V4SFmode);
3135
 
3136
      emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
3137
      emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
3138
      emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
3139
    }
3140
  DONE;
3141
})
3142
 
3143
(define_expand "vec_pack_sfix_trunc_v4df"
3144
  [(match_operand:V8SI 0 "register_operand" "")
3145
   (match_operand:V4DF 1 "nonimmediate_operand" "")
3146
   (match_operand:V4DF 2 "nonimmediate_operand" "")]
3147
  "TARGET_AVX"
3148
{
3149
  rtx r1, r2;
3150
 
3151
  r1 = gen_reg_rtx (V4SImode);
3152
  r2 = gen_reg_rtx (V4SImode);
3153
 
3154
  emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
3155
  emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
3156
  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3157
  DONE;
3158
})
3159
 
3160
(define_expand "vec_pack_sfix_trunc_v2df"
3161
  [(match_operand:V4SI 0 "register_operand" "")
3162
   (match_operand:V2DF 1 "nonimmediate_operand" "")
3163
   (match_operand:V2DF 2 "nonimmediate_operand" "")]
3164
  "TARGET_SSE2"
3165
{
3166
  rtx tmp0, tmp1;
3167
 
3168
  if (TARGET_AVX && !TARGET_PREFER_AVX128)
3169
    {
3170
      tmp0 = gen_reg_rtx (V4DFmode);
3171
      tmp1 = force_reg (V2DFmode, operands[1]);
3172
 
3173
      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3174
      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
3175
    }
3176
  else
3177
    {
3178
      tmp0 = gen_reg_rtx (V4SImode);
3179
      tmp1 = gen_reg_rtx (V4SImode);
3180
 
3181
      emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
3182
      emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
3183
      emit_insn
3184
       (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3185
                                    gen_lowpart (V2DImode, tmp0),
3186
                                    gen_lowpart (V2DImode, tmp1)));
3187
    }
3188
  DONE;
3189
})
3190
 
3191
(define_mode_attr ssepackfltmode
3192
  [(V4DF "V8SI") (V2DF "V4SI")])
3193
 
3194
(define_expand "vec_pack_ufix_trunc_"
3195
  [(match_operand: 0 "register_operand" "")
3196
   (match_operand:VF2 1 "register_operand" "")
3197
   (match_operand:VF2 2 "register_operand" "")]
3198
  "TARGET_SSE2"
3199
{
3200
  rtx tmp[7];
3201
  tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3202
  tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3203
  tmp[4] = gen_reg_rtx (mode);
3204
  emit_insn (gen_vec_pack_sfix_trunc_ (tmp[4], tmp[0], tmp[1]));
3205
  if (mode == V4SImode || TARGET_AVX2)
3206
    {
3207
      tmp[5] = gen_reg_rtx (mode);
3208
      ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3209
    }
3210
  else
3211
    {
3212
      tmp[5] = gen_reg_rtx (V8SFmode);
3213
      ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3214
                                        gen_lowpart (V8SFmode, tmp[3]), 0);
3215
      tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3216
    }
3217
  tmp[6] = expand_simple_binop (mode, XOR, tmp[4], tmp[5],
3218
                                operands[0], 0, OPTAB_DIRECT);
3219
  if (tmp[6] != operands[0])
3220
    emit_move_insn (operands[0], tmp[6]);
3221
  DONE;
3222
})
3223
 
3224
(define_expand "vec_pack_sfix_v4df"
3225
  [(match_operand:V8SI 0 "register_operand" "")
3226
   (match_operand:V4DF 1 "nonimmediate_operand" "")
3227
   (match_operand:V4DF 2 "nonimmediate_operand" "")]
3228
  "TARGET_AVX"
3229
{
3230
  rtx r1, r2;
3231
 
3232
  r1 = gen_reg_rtx (V4SImode);
3233
  r2 = gen_reg_rtx (V4SImode);
3234
 
3235
  emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
3236
  emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
3237
  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
3238
  DONE;
3239
})
3240
 
3241
(define_expand "vec_pack_sfix_v2df"
3242
  [(match_operand:V4SI 0 "register_operand" "")
3243
   (match_operand:V2DF 1 "nonimmediate_operand" "")
3244
   (match_operand:V2DF 2 "nonimmediate_operand" "")]
3245
  "TARGET_SSE2"
3246
{
3247
  rtx tmp0, tmp1;
3248
 
3249
  if (TARGET_AVX && !TARGET_PREFER_AVX128)
3250
    {
3251
      tmp0 = gen_reg_rtx (V4DFmode);
3252
      tmp1 = force_reg (V2DFmode, operands[1]);
3253
 
3254
      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
3255
      emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
3256
    }
3257
  else
3258
    {
3259
      tmp0 = gen_reg_rtx (V4SImode);
3260
      tmp1 = gen_reg_rtx (V4SImode);
3261
 
3262
      emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
3263
      emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
3264
      emit_insn
3265
       (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3266
                                    gen_lowpart (V2DImode, tmp0),
3267
                                    gen_lowpart (V2DImode, tmp1)));
3268
    }
3269
  DONE;
3270
})
3271
 
3272
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3273
;;
3274
;; Parallel single-precision floating point element swizzling
3275
;;
3276
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3277
 
3278
(define_expand "sse_movhlps_exp"
3279
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3280
        (vec_select:V4SF
3281
          (vec_concat:V8SF
3282
            (match_operand:V4SF 1 "nonimmediate_operand" "")
3283
            (match_operand:V4SF 2 "nonimmediate_operand" ""))
3284
          (parallel [(const_int 6)
3285
                     (const_int 7)
3286
                     (const_int 2)
3287
                     (const_int 3)])))]
3288
  "TARGET_SSE"
3289
{
3290
  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3291
 
3292
  emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3293
 
3294
  /* Fix up the destination if needed.  */
3295
  if (dst != operands[0])
3296
    emit_move_insn (operands[0], dst);
3297
 
3298
  DONE;
3299
})
3300
 
3301
(define_insn "sse_movhlps"
3302
  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,m")
3303
        (vec_select:V4SF
3304
          (vec_concat:V8SF
3305
            (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3306
            (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3307
          (parallel [(const_int 6)
3308
                     (const_int 7)
3309
                     (const_int 2)
3310
                     (const_int 3)])))]
3311
  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3312
  "@
3313
   movhlps\t{%2, %0|%0, %2}
3314
   vmovhlps\t{%2, %1, %0|%0, %1, %2}
3315
   movlps\t{%H2, %0|%0, %H2}
3316
   vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3317
   %vmovhps\t{%2, %0|%0, %2}"
3318
  [(set_attr "isa" "noavx,avx,noavx,avx,*")
3319
   (set_attr "type" "ssemov")
3320
   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3321
   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3322
 
3323
(define_expand "sse_movlhps_exp"
3324
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3325
        (vec_select:V4SF
3326
          (vec_concat:V8SF
3327
            (match_operand:V4SF 1 "nonimmediate_operand" "")
3328
            (match_operand:V4SF 2 "nonimmediate_operand" ""))
3329
          (parallel [(const_int 0)
3330
                     (const_int 1)
3331
                     (const_int 4)
3332
                     (const_int 5)])))]
3333
  "TARGET_SSE"
3334
{
3335
  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3336
 
3337
  emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3338
 
3339
  /* Fix up the destination if needed.  */
3340
  if (dst != operands[0])
3341
    emit_move_insn (operands[0], dst);
3342
 
3343
  DONE;
3344
})
3345
 
3346
(define_insn "sse_movlhps"
3347
  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,o")
3348
        (vec_select:V4SF
3349
          (vec_concat:V8SF
3350
            (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3351
            (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3352
          (parallel [(const_int 0)
3353
                     (const_int 1)
3354
                     (const_int 4)
3355
                     (const_int 5)])))]
3356
  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3357
  "@
3358
   movlhps\t{%2, %0|%0, %2}
3359
   vmovlhps\t{%2, %1, %0|%0, %1, %2}
3360
   movhps\t{%2, %0|%0, %2}
3361
   vmovhps\t{%2, %1, %0|%0, %1, %2}
3362
   %vmovlps\t{%2, %H0|%H0, %2}"
3363
  [(set_attr "isa" "noavx,avx,noavx,avx,*")
3364
   (set_attr "type" "ssemov")
3365
   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3366
   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3367
 
3368
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3369
(define_insn "avx_unpckhps256"
3370
  [(set (match_operand:V8SF 0 "register_operand" "=x")
3371
        (vec_select:V8SF
3372
          (vec_concat:V16SF
3373
            (match_operand:V8SF 1 "register_operand" "x")
3374
            (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3375
          (parallel [(const_int 2) (const_int 10)
3376
                     (const_int 3) (const_int 11)
3377
                     (const_int 6) (const_int 14)
3378
                     (const_int 7) (const_int 15)])))]
3379
  "TARGET_AVX"
3380
  "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3381
  [(set_attr "type" "sselog")
3382
   (set_attr "prefix" "vex")
3383
   (set_attr "mode" "V8SF")])
3384
 
3385
(define_expand "vec_interleave_highv8sf"
3386
  [(set (match_dup 3)
3387
        (vec_select:V8SF
3388
          (vec_concat:V16SF
3389
            (match_operand:V8SF 1 "register_operand" "x")
3390
            (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3391
          (parallel [(const_int 0) (const_int 8)
3392
                     (const_int 1) (const_int 9)
3393
                     (const_int 4) (const_int 12)
3394
                     (const_int 5) (const_int 13)])))
3395
   (set (match_dup 4)
3396
        (vec_select:V8SF
3397
          (vec_concat:V16SF
3398
            (match_dup 1)
3399
            (match_dup 2))
3400
          (parallel [(const_int 2) (const_int 10)
3401
                     (const_int 3) (const_int 11)
3402
                     (const_int 6) (const_int 14)
3403
                     (const_int 7) (const_int 15)])))
3404
   (set (match_operand:V8SF 0 "register_operand" "")
3405
        (vec_select:V8SF
3406
          (vec_concat:V16SF
3407
            (match_dup 3)
3408
            (match_dup 4))
3409
          (parallel [(const_int 4) (const_int 5)
3410
                     (const_int 6) (const_int 7)
3411
                     (const_int 12) (const_int 13)
3412
                     (const_int 14) (const_int 15)])))]
3413
 "TARGET_AVX"
3414
{
3415
  operands[3] = gen_reg_rtx (V8SFmode);
3416
  operands[4] = gen_reg_rtx (V8SFmode);
3417
})
3418
 
3419
(define_insn "vec_interleave_highv4sf"
3420
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3421
        (vec_select:V4SF
3422
          (vec_concat:V8SF
3423
            (match_operand:V4SF 1 "register_operand" "0,x")
3424
            (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3425
          (parallel [(const_int 2) (const_int 6)
3426
                     (const_int 3) (const_int 7)])))]
3427
  "TARGET_SSE"
3428
  "@
3429
   unpckhps\t{%2, %0|%0, %2}
3430
   vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3431
  [(set_attr "isa" "noavx,avx")
3432
   (set_attr "type" "sselog")
3433
   (set_attr "prefix" "orig,vex")
3434
   (set_attr "mode" "V4SF")])
3435
 
3436
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3437
(define_insn "avx_unpcklps256"
3438
  [(set (match_operand:V8SF 0 "register_operand" "=x")
3439
        (vec_select:V8SF
3440
          (vec_concat:V16SF
3441
            (match_operand:V8SF 1 "register_operand" "x")
3442
            (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3443
          (parallel [(const_int 0) (const_int 8)
3444
                     (const_int 1) (const_int 9)
3445
                     (const_int 4) (const_int 12)
3446
                     (const_int 5) (const_int 13)])))]
3447
  "TARGET_AVX"
3448
  "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3449
  [(set_attr "type" "sselog")
3450
   (set_attr "prefix" "vex")
3451
   (set_attr "mode" "V8SF")])
3452
 
3453
(define_expand "vec_interleave_lowv8sf"
3454
  [(set (match_dup 3)
3455
        (vec_select:V8SF
3456
          (vec_concat:V16SF
3457
            (match_operand:V8SF 1 "register_operand" "x")
3458
            (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3459
          (parallel [(const_int 0) (const_int 8)
3460
                     (const_int 1) (const_int 9)
3461
                     (const_int 4) (const_int 12)
3462
                     (const_int 5) (const_int 13)])))
3463
   (set (match_dup 4)
3464
        (vec_select:V8SF
3465
          (vec_concat:V16SF
3466
            (match_dup 1)
3467
            (match_dup 2))
3468
          (parallel [(const_int 2) (const_int 10)
3469
                     (const_int 3) (const_int 11)
3470
                     (const_int 6) (const_int 14)
3471
                     (const_int 7) (const_int 15)])))
3472
   (set (match_operand:V8SF 0 "register_operand" "")
3473
        (vec_select:V8SF
3474
          (vec_concat:V16SF
3475
            (match_dup 3)
3476
            (match_dup 4))
3477
          (parallel [(const_int 0) (const_int 1)
3478
                     (const_int 2) (const_int 3)
3479
                     (const_int 8) (const_int 9)
3480
                     (const_int 10) (const_int 11)])))]
3481
 "TARGET_AVX"
3482
{
3483
  operands[3] = gen_reg_rtx (V8SFmode);
3484
  operands[4] = gen_reg_rtx (V8SFmode);
3485
})
3486
 
3487
(define_insn "vec_interleave_lowv4sf"
3488
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3489
        (vec_select:V4SF
3490
          (vec_concat:V8SF
3491
            (match_operand:V4SF 1 "register_operand" "0,x")
3492
            (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3493
          (parallel [(const_int 0) (const_int 4)
3494
                     (const_int 1) (const_int 5)])))]
3495
  "TARGET_SSE"
3496
  "@
3497
   unpcklps\t{%2, %0|%0, %2}
3498
   vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3499
  [(set_attr "isa" "noavx,avx")
3500
   (set_attr "type" "sselog")
3501
   (set_attr "prefix" "orig,vex")
3502
   (set_attr "mode" "V4SF")])
3503
 
3504
;; These are modeled with the same vec_concat as the others so that we
3505
;; capture users of shufps that can use the new instructions
3506
(define_insn "avx_movshdup256"
3507
  [(set (match_operand:V8SF 0 "register_operand" "=x")
3508
        (vec_select:V8SF
3509
          (vec_concat:V16SF
3510
            (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3511
            (match_dup 1))
3512
          (parallel [(const_int 1) (const_int 1)
3513
                     (const_int 3) (const_int 3)
3514
                     (const_int 5) (const_int 5)
3515
                     (const_int 7) (const_int 7)])))]
3516
  "TARGET_AVX"
3517
  "vmovshdup\t{%1, %0|%0, %1}"
3518
  [(set_attr "type" "sse")
3519
   (set_attr "prefix" "vex")
3520
   (set_attr "mode" "V8SF")])
3521
 
3522
(define_insn "sse3_movshdup"
3523
  [(set (match_operand:V4SF 0 "register_operand" "=x")
3524
        (vec_select:V4SF
3525
          (vec_concat:V8SF
3526
            (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3527
            (match_dup 1))
3528
          (parallel [(const_int 1)
3529
                     (const_int 1)
3530
                     (const_int 7)
3531
                     (const_int 7)])))]
3532
  "TARGET_SSE3"
3533
  "%vmovshdup\t{%1, %0|%0, %1}"
3534
  [(set_attr "type" "sse")
3535
   (set_attr "prefix_rep" "1")
3536
   (set_attr "prefix" "maybe_vex")
3537
   (set_attr "mode" "V4SF")])
3538
 
3539
(define_insn "avx_movsldup256"
3540
  [(set (match_operand:V8SF 0 "register_operand" "=x")
3541
        (vec_select:V8SF
3542
          (vec_concat:V16SF
3543
            (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3544
            (match_dup 1))
3545
          (parallel [(const_int 0) (const_int 0)
3546
                     (const_int 2) (const_int 2)
3547
                     (const_int 4) (const_int 4)
3548
                     (const_int 6) (const_int 6)])))]
3549
  "TARGET_AVX"
3550
  "vmovsldup\t{%1, %0|%0, %1}"
3551
  [(set_attr "type" "sse")
3552
   (set_attr "prefix" "vex")
3553
   (set_attr "mode" "V8SF")])
3554
 
3555
(define_insn "sse3_movsldup"
3556
  [(set (match_operand:V4SF 0 "register_operand" "=x")
3557
        (vec_select:V4SF
3558
          (vec_concat:V8SF
3559
            (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3560
            (match_dup 1))
3561
          (parallel [(const_int 0)
3562
                     (const_int 0)
3563
                     (const_int 6)
3564
                     (const_int 6)])))]
3565
  "TARGET_SSE3"
3566
  "%vmovsldup\t{%1, %0|%0, %1}"
3567
  [(set_attr "type" "sse")
3568
   (set_attr "prefix_rep" "1")
3569
   (set_attr "prefix" "maybe_vex")
3570
   (set_attr "mode" "V4SF")])
3571
 
3572
(define_expand "avx_shufps256"
3573
  [(match_operand:V8SF 0 "register_operand" "")
3574
   (match_operand:V8SF 1 "register_operand" "")
3575
   (match_operand:V8SF 2 "nonimmediate_operand" "")
3576
   (match_operand:SI 3 "const_int_operand" "")]
3577
  "TARGET_AVX"
3578
{
3579
  int mask = INTVAL (operands[3]);
3580
  emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3581
                                  GEN_INT ((mask >> 0) & 3),
3582
                                  GEN_INT ((mask >> 2) & 3),
3583
                                  GEN_INT (((mask >> 4) & 3) + 8),
3584
                                  GEN_INT (((mask >> 6) & 3) + 8),
3585
                                  GEN_INT (((mask >> 0) & 3) + 4),
3586
                                  GEN_INT (((mask >> 2) & 3) + 4),
3587
                                  GEN_INT (((mask >> 4) & 3) + 12),
3588
                                  GEN_INT (((mask >> 6) & 3) + 12)));
3589
  DONE;
3590
})
3591
 
3592
;; One bit in mask selects 2 elements.
3593
(define_insn "avx_shufps256_1"
3594
  [(set (match_operand:V8SF 0 "register_operand" "=x")
3595
        (vec_select:V8SF
3596
          (vec_concat:V16SF
3597
            (match_operand:V8SF 1 "register_operand" "x")
3598
            (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3599
          (parallel [(match_operand 3  "const_0_to_3_operand"   "")
3600
                     (match_operand 4  "const_0_to_3_operand"   "")
3601
                     (match_operand 5  "const_8_to_11_operand"  "")
3602
                     (match_operand 6  "const_8_to_11_operand"  "")
3603
                     (match_operand 7  "const_4_to_7_operand"   "")
3604
                     (match_operand 8  "const_4_to_7_operand"   "")
3605
                     (match_operand 9  "const_12_to_15_operand" "")
3606
                     (match_operand 10 "const_12_to_15_operand" "")])))]
3607
  "TARGET_AVX
3608
   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3609
       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3610
       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3611
       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3612
{
3613
  int mask;
3614
  mask = INTVAL (operands[3]);
3615
  mask |= INTVAL (operands[4]) << 2;
3616
  mask |= (INTVAL (operands[5]) - 8) << 4;
3617
  mask |= (INTVAL (operands[6]) - 8) << 6;
3618
  operands[3] = GEN_INT (mask);
3619
 
3620
  return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3621
}
3622
  [(set_attr "type" "sselog")
3623
   (set_attr "length_immediate" "1")
3624
   (set_attr "prefix" "vex")
3625
   (set_attr "mode" "V8SF")])
3626
 
3627
(define_expand "sse_shufps"
3628
  [(match_operand:V4SF 0 "register_operand" "")
3629
   (match_operand:V4SF 1 "register_operand" "")
3630
   (match_operand:V4SF 2 "nonimmediate_operand" "")
3631
   (match_operand:SI 3 "const_int_operand" "")]
3632
  "TARGET_SSE"
3633
{
3634
  int mask = INTVAL (operands[3]);
3635
  emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3636
                               GEN_INT ((mask >> 0) & 3),
3637
                               GEN_INT ((mask >> 2) & 3),
3638
                               GEN_INT (((mask >> 4) & 3) + 4),
3639
                               GEN_INT (((mask >> 6) & 3) + 4)));
3640
  DONE;
3641
})
3642
 
3643
(define_insn "sse_shufps_"
3644
  [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3645
        (vec_select:VI4F_128
3646
          (vec_concat:
3647
            (match_operand:VI4F_128 1 "register_operand" "0,x")
3648
            (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3649
          (parallel [(match_operand 3 "const_0_to_3_operand" "")
3650
                     (match_operand 4 "const_0_to_3_operand" "")
3651
                     (match_operand 5 "const_4_to_7_operand" "")
3652
                     (match_operand 6 "const_4_to_7_operand" "")])))]
3653
  "TARGET_SSE"
3654
{
3655
  int mask = 0;
3656
  mask |= INTVAL (operands[3]) << 0;
3657
  mask |= INTVAL (operands[4]) << 2;
3658
  mask |= (INTVAL (operands[5]) - 4) << 4;
3659
  mask |= (INTVAL (operands[6]) - 4) << 6;
3660
  operands[3] = GEN_INT (mask);
3661
 
3662
  switch (which_alternative)
3663
    {
3664
    case 0:
3665
      return "shufps\t{%3, %2, %0|%0, %2, %3}";
3666
    case 1:
3667
      return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3668
    default:
3669
      gcc_unreachable ();
3670
    }
3671
}
3672
  [(set_attr "isa" "noavx,avx")
3673
   (set_attr "type" "sselog")
3674
   (set_attr "length_immediate" "1")
3675
   (set_attr "prefix" "orig,vex")
3676
   (set_attr "mode" "V4SF")])
3677
 
3678
(define_insn "sse_storehps"
3679
  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3680
        (vec_select:V2SF
3681
          (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3682
          (parallel [(const_int 2) (const_int 3)])))]
3683
  "TARGET_SSE"
3684
  "@
3685
   %vmovhps\t{%1, %0|%0, %1}
3686
   %vmovhlps\t{%1, %d0|%d0, %1}
3687
   %vmovlps\t{%H1, %d0|%d0, %H1}"
3688
  [(set_attr "type" "ssemov")
3689
   (set_attr "prefix" "maybe_vex")
3690
   (set_attr "mode" "V2SF,V4SF,V2SF")])
3691
 
3692
(define_expand "sse_loadhps_exp"
3693
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3694
        (vec_concat:V4SF
3695
          (vec_select:V2SF
3696
            (match_operand:V4SF 1 "nonimmediate_operand" "")
3697
            (parallel [(const_int 0) (const_int 1)]))
3698
          (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3699
  "TARGET_SSE"
3700
{
3701
  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3702
 
3703
  emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3704
 
3705
  /* Fix up the destination if needed.  */
3706
  if (dst != operands[0])
3707
    emit_move_insn (operands[0], dst);
3708
 
3709
  DONE;
3710
})
3711
 
3712
(define_insn "sse_loadhps"
3713
  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,o")
3714
        (vec_concat:V4SF
3715
          (vec_select:V2SF
3716
            (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3717
            (parallel [(const_int 0) (const_int 1)]))
3718
          (match_operand:V2SF 2 "nonimmediate_operand"   " m,m,x,x,x")))]
3719
  "TARGET_SSE"
3720
  "@
3721
   movhps\t{%2, %0|%0, %2}
3722
   vmovhps\t{%2, %1, %0|%0, %1, %2}
3723
   movlhps\t{%2, %0|%0, %2}
3724
   vmovlhps\t{%2, %1, %0|%0, %1, %2}
3725
   %vmovlps\t{%2, %H0|%H0, %2}"
3726
  [(set_attr "isa" "noavx,avx,noavx,avx,*")
3727
   (set_attr "type" "ssemov")
3728
   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3729
   (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3730
 
3731
(define_insn "sse_storelps"
3732
  [(set (match_operand:V2SF 0 "nonimmediate_operand"   "=m,x,x")
3733
        (vec_select:V2SF
3734
          (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3735
          (parallel [(const_int 0) (const_int 1)])))]
3736
  "TARGET_SSE"
3737
  "@
3738
   %vmovlps\t{%1, %0|%0, %1}
3739
   %vmovaps\t{%1, %0|%0, %1}
3740
   %vmovlps\t{%1, %d0|%d0, %1}"
3741
  [(set_attr "type" "ssemov")
3742
   (set_attr "prefix" "maybe_vex")
3743
   (set_attr "mode" "V2SF,V4SF,V2SF")])
3744
 
3745
(define_expand "sse_loadlps_exp"
3746
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3747
        (vec_concat:V4SF
3748
          (match_operand:V2SF 2 "nonimmediate_operand" "")
3749
          (vec_select:V2SF
3750
            (match_operand:V4SF 1 "nonimmediate_operand" "")
3751
            (parallel [(const_int 2) (const_int 3)]))))]
3752
  "TARGET_SSE"
3753
{
3754
  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3755
 
3756
  emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3757
 
3758
  /* Fix up the destination if needed.  */
3759
  if (dst != operands[0])
3760
    emit_move_insn (operands[0], dst);
3761
 
3762
  DONE;
3763
})
3764
 
3765
(define_insn "sse_loadlps"
3766
  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,m")
3767
        (vec_concat:V4SF
3768
          (match_operand:V2SF 2 "nonimmediate_operand"   " 0,x,m,x,x")
3769
          (vec_select:V2SF
3770
            (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3771
            (parallel [(const_int 2) (const_int 3)]))))]
3772
  "TARGET_SSE"
3773
  "@
3774
   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3775
   vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3776
   movlps\t{%2, %0|%0, %2}
3777
   vmovlps\t{%2, %1, %0|%0, %1, %2}
3778
   %vmovlps\t{%2, %0|%0, %2}"
3779
  [(set_attr "isa" "noavx,avx,noavx,avx,*")
3780
   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3781
   (set_attr "length_immediate" "1,1,*,*,*")
3782
   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3783
   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3784
 
3785
(define_insn "sse_movss"
3786
  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
3787
        (vec_merge:V4SF
3788
          (match_operand:V4SF 2 "register_operand" " x,x")
3789
          (match_operand:V4SF 1 "register_operand" " 0,x")
3790
          (const_int 1)))]
3791
  "TARGET_SSE"
3792
  "@
3793
   movss\t{%2, %0|%0, %2}
3794
   vmovss\t{%2, %1, %0|%0, %1, %2}"
3795
  [(set_attr "isa" "noavx,avx")
3796
   (set_attr "type" "ssemov")
3797
   (set_attr "prefix" "orig,vex")
3798
   (set_attr "mode" "SF")])
3799
 
3800
(define_insn "avx2_vec_dup"
3801
  [(set (match_operand:VF1 0 "register_operand" "=x")
3802
        (vec_duplicate:VF1
3803
          (vec_select:SF
3804
            (match_operand:V4SF 1 "register_operand" "x")
3805
            (parallel [(const_int 0)]))))]
3806
  "TARGET_AVX2"
3807
  "vbroadcastss\t{%1, %0|%0, %1}"
3808
  [(set_attr "type" "sselog1")
3809
    (set_attr "prefix" "vex")
3810
    (set_attr "mode" "")])
3811
 
3812
(define_insn "vec_dupv4sf"
3813
  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
3814
        (vec_duplicate:V4SF
3815
          (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
3816
  "TARGET_SSE"
3817
  "@
3818
   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3819
   vbroadcastss\t{%1, %0|%0, %1}
3820
   shufps\t{$0, %0, %0|%0, %0, 0}"
3821
  [(set_attr "isa" "avx,avx,noavx")
3822
   (set_attr "type" "sselog1,ssemov,sselog1")
3823
   (set_attr "length_immediate" "1,0,1")
3824
   (set_attr "prefix_extra" "0,1,*")
3825
   (set_attr "prefix" "vex,vex,orig")
3826
   (set_attr "mode" "V4SF")])
3827
 
3828
;; Although insertps takes register source, we prefer
3829
;; unpcklps with register source since it is shorter.
3830
(define_insn "*vec_concatv2sf_sse4_1"
3831
  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,x,x,x,*y ,*y")
3832
        (vec_concat:V2SF
3833
          (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3834
          (match_operand:SF 2 "vector_move_operand"  " x,x,m,m,C,*ym, C")))]
3835
  "TARGET_SSE4_1"
3836
  "@
3837
   unpcklps\t{%2, %0|%0, %2}
3838
   vunpcklps\t{%2, %1, %0|%0, %1, %2}
3839
   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3840
   vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3841
   %vmovss\t{%1, %0|%0, %1}
3842
   punpckldq\t{%2, %0|%0, %2}
3843
   movd\t{%1, %0|%0, %1}"
3844
  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3845
   (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3846
   (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3847
   (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3848
   (set_attr "length_immediate" "*,*,1,1,*,*,*")
3849
   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3850
   (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3851
 
3852
;; ??? In theory we can match memory for the MMX alternative, but allowing
3853
;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3854
;; alternatives pretty much forces the MMX alternative to be chosen.
3855
(define_insn "*vec_concatv2sf_sse"
3856
  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
3857
        (vec_concat:V2SF
3858
          (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3859
          (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
3860
  "TARGET_SSE"
3861
  "@
3862
   unpcklps\t{%2, %0|%0, %2}
3863
   movss\t{%1, %0|%0, %1}
3864
   punpckldq\t{%2, %0|%0, %2}
3865
   movd\t{%1, %0|%0, %1}"
3866
  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3867
   (set_attr "mode" "V4SF,SF,DI,DI")])
3868
 
3869
(define_insn "*vec_concatv4sf"
3870
  [(set (match_operand:V4SF 0 "register_operand"       "=x,x,x,x")
3871
        (vec_concat:V4SF
3872
          (match_operand:V2SF 1 "register_operand"     " 0,x,0,x")
3873
          (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3874
  "TARGET_SSE"
3875
  "@
3876
   movlhps\t{%2, %0|%0, %2}
3877
   vmovlhps\t{%2, %1, %0|%0, %1, %2}
3878
   movhps\t{%2, %0|%0, %2}
3879
   vmovhps\t{%2, %1, %0|%0, %1, %2}"
3880
  [(set_attr "isa" "noavx,avx,noavx,avx")
3881
   (set_attr "type" "ssemov")
3882
   (set_attr "prefix" "orig,vex,orig,vex")
3883
   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3884
 
3885
(define_expand "vec_init"
3886
  [(match_operand:V_128 0 "register_operand" "")
3887
   (match_operand 1 "" "")]
3888
  "TARGET_SSE"
3889
{
3890
  ix86_expand_vector_init (false, operands[0], operands[1]);
3891
  DONE;
3892
})
3893
 
3894
;; Avoid combining registers from different units in a single alternative,
3895
;; see comment above inline_secondary_memory_needed function in i386.c
3896
(define_insn "vec_set_0"
3897
  [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3898
          "=x,x,x ,x,x,x,x  ,x  ,m,m ,m")
3899
        (vec_merge:VI4F_128
3900
          (vec_duplicate:VI4F_128
3901
            (match_operand: 2 "general_operand"
3902
          " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3903
          (match_operand:VI4F_128 1 "vector_move_operand"
3904
          " C,C,C ,C,0,x,0  ,x  ,0,0 ,0")
3905
          (const_int 1)))]
3906
  "TARGET_SSE"
3907
  "@
3908
   %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3909
   %vmov\t{%2, %0|%0, %2}
3910
   %vmovd\t{%2, %0|%0, %2}
3911
   movss\t{%2, %0|%0, %2}
3912
   movss\t{%2, %0|%0, %2}
3913
   vmovss\t{%2, %1, %0|%0, %1, %2}
3914
   pinsrd\t{$0, %2, %0|%0, %2, 0}
3915
   vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3916
   #
3917
   #
3918
   #"
3919
  [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3920
   (set (attr "type")
3921
     (cond [(eq_attr "alternative" "0,6,7")
3922
              (const_string "sselog")
3923
            (eq_attr "alternative" "9")
3924
              (const_string "fmov")
3925
            (eq_attr "alternative" "10")
3926
              (const_string "imov")
3927
           ]
3928
           (const_string "ssemov")))
3929
   (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3930
   (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3931
   (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3932
   (set_attr "mode" "SF,,SI,SF,SF,SF,TI,TI,*,*,*")])
3933
 
3934
;; A subset is vec_setv4sf.
3935
(define_insn "*vec_setv4sf_sse4_1"
3936
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3937
        (vec_merge:V4SF
3938
          (vec_duplicate:V4SF
3939
            (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3940
          (match_operand:V4SF 1 "register_operand" "0,x")
3941
          (match_operand:SI 3 "const_int_operand" "")))]
3942
  "TARGET_SSE4_1
3943
   && ((unsigned) exact_log2 (INTVAL (operands[3]))
3944
       < GET_MODE_NUNITS (V4SFmode))"
3945
{
3946
  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3947
  switch (which_alternative)
3948
    {
3949
    case 0:
3950
      return "insertps\t{%3, %2, %0|%0, %2, %3}";
3951
    case 1:
3952
      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3953
    default:
3954
      gcc_unreachable ();
3955
    }
3956
}
3957
  [(set_attr "isa" "noavx,avx")
3958
   (set_attr "type" "sselog")
3959
   (set_attr "prefix_data16" "1,*")
3960
   (set_attr "prefix_extra" "1")
3961
   (set_attr "length_immediate" "1")
3962
   (set_attr "prefix" "orig,vex")
3963
   (set_attr "mode" "V4SF")])
3964
 
3965
(define_insn "sse4_1_insertps"
3966
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3967
        (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3968
                      (match_operand:V4SF 1 "register_operand" "0,x")
3969
                      (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3970
                     UNSPEC_INSERTPS))]
3971
  "TARGET_SSE4_1"
3972
{
3973
  if (MEM_P (operands[2]))
3974
    {
3975
      unsigned count_s = INTVAL (operands[3]) >> 6;
3976
      if (count_s)
3977
        operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3978
      operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3979
    }
3980
  switch (which_alternative)
3981
    {
3982
    case 0:
3983
      return "insertps\t{%3, %2, %0|%0, %2, %3}";
3984
    case 1:
3985
      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3986
    default:
3987
      gcc_unreachable ();
3988
    }
3989
}
3990
  [(set_attr "isa" "noavx,avx")
3991
   (set_attr "type" "sselog")
3992
   (set_attr "prefix_data16" "1,*")
3993
   (set_attr "prefix_extra" "1")
3994
   (set_attr "length_immediate" "1")
3995
   (set_attr "prefix" "orig,vex")
3996
   (set_attr "mode" "V4SF")])
3997
 
3998
(define_split
3999
  [(set (match_operand:VI4F_128 0 "memory_operand" "")
4000
        (vec_merge:VI4F_128
4001
          (vec_duplicate:VI4F_128
4002
            (match_operand: 1 "nonmemory_operand" ""))
4003
          (match_dup 0)
4004
          (const_int 1)))]
4005
  "TARGET_SSE && reload_completed"
4006
  [(const_int 0)]
4007
{
4008
  emit_move_insn (adjust_address (operands[0], mode, 0),
4009
                  operands[1]);
4010
  DONE;
4011
})
4012
 
4013
(define_expand "vec_set"
4014
  [(match_operand:V 0 "register_operand" "")
4015
   (match_operand: 1 "register_operand" "")
4016
   (match_operand 2 "const_int_operand" "")]
4017
  "TARGET_SSE"
4018
{
4019
  ix86_expand_vector_set (false, operands[0], operands[1],
4020
                          INTVAL (operands[2]));
4021
  DONE;
4022
})
4023
 
4024
(define_insn_and_split "*vec_extractv4sf_0"
4025
  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4026
        (vec_select:SF
4027
          (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4028
          (parallel [(const_int 0)])))]
4029
  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4030
  "#"
4031
  "&& reload_completed"
4032
  [(const_int 0)]
4033
{
4034
  rtx op1 = operands[1];
4035
  if (REG_P (op1))
4036
    op1 = gen_rtx_REG (SFmode, REGNO (op1));
4037
  else
4038
    op1 = gen_lowpart (SFmode, op1);
4039
  emit_move_insn (operands[0], op1);
4040
  DONE;
4041
})
4042
 
4043
(define_insn_and_split "*sse4_1_extractps"
4044
  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
4045
        (vec_select:SF
4046
          (match_operand:V4SF 1 "register_operand" "x,0,x")
4047
          (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
4048
  "TARGET_SSE4_1"
4049
  "@
4050
   %vextractps\t{%2, %1, %0|%0, %1, %2}
4051
   #
4052
   #"
4053
  "&& reload_completed && SSE_REG_P (operands[0])"
4054
  [(const_int 0)]
4055
{
4056
  rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
4057
  switch (INTVAL (operands[2]))
4058
    {
4059
    case 1:
4060
    case 3:
4061
      emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
4062
                                      operands[2], operands[2],
4063
                                      GEN_INT (INTVAL (operands[2]) + 4),
4064
                                      GEN_INT (INTVAL (operands[2]) + 4)));
4065
      break;
4066
    case 2:
4067
      emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
4068
      break;
4069
    default:
4070
      /* 0 should be handled by the *vec_extractv4sf_0 pattern above.  */
4071
      gcc_unreachable ();
4072
    }
4073
  DONE;
4074
}
4075
  [(set_attr "isa" "*,noavx,avx")
4076
   (set_attr "type" "sselog,*,*")
4077
   (set_attr "prefix_data16" "1,*,*")
4078
   (set_attr "prefix_extra" "1,*,*")
4079
   (set_attr "length_immediate" "1,*,*")
4080
   (set_attr "prefix" "maybe_vex,*,*")
4081
   (set_attr "mode" "V4SF,*,*")])
4082
 
4083
(define_insn_and_split "*vec_extract_v4sf_mem"
4084
  [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
4085
       (vec_select:SF
4086
         (match_operand:V4SF 1 "memory_operand" "o,o,o")
4087
         (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
4088
  "TARGET_SSE"
4089
  "#"
4090
  "&& reload_completed"
4091
  [(const_int 0)]
4092
{
4093
  int i = INTVAL (operands[2]);
4094
 
4095
  emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4096
  DONE;
4097
})
4098
 
4099
(define_expand "avx_vextractf128"
4100
  [(match_operand: 0 "nonimmediate_operand" "")
4101
   (match_operand:V_256 1 "register_operand" "")
4102
   (match_operand:SI 2 "const_0_to_1_operand" "")]
4103
  "TARGET_AVX"
4104
{
4105
  rtx (*insn)(rtx, rtx);
4106
 
4107
  switch (INTVAL (operands[2]))
4108
    {
4109
    case 0:
4110
      insn = gen_vec_extract_lo_;
4111
      break;
4112
    case 1:
4113
      insn = gen_vec_extract_hi_;
4114
      break;
4115
    default:
4116
      gcc_unreachable ();
4117
    }
4118
 
4119
  emit_insn (insn (operands[0], operands[1]));
4120
  DONE;
4121
})
4122
 
4123
(define_insn_and_split "vec_extract_lo_"
4124
  [(set (match_operand: 0 "nonimmediate_operand" "=x,m")
4125
        (vec_select:
4126
          (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4127
          (parallel [(const_int 0) (const_int 1)])))]
4128
  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4129
  "#"
4130
  "&& reload_completed"
4131
  [(const_int 0)]
4132
{
4133
  rtx op1 = operands[1];
4134
  if (REG_P (op1))
4135
    op1 = gen_rtx_REG (mode, REGNO (op1));
4136
  else
4137
    op1 = gen_lowpart (mode, op1);
4138
  emit_move_insn (operands[0], op1);
4139
  DONE;
4140
})
4141
 
4142
(define_insn "vec_extract_hi_"
4143
  [(set (match_operand: 0 "nonimmediate_operand" "=x,m")
4144
        (vec_select:
4145
          (match_operand:VI8F_256 1 "register_operand" "x,x")
4146
          (parallel [(const_int 2) (const_int 3)])))]
4147
  "TARGET_AVX"
4148
  "vextract\t{$0x1, %1, %0|%0, %1, 0x1}"
4149
  [(set_attr "type" "sselog")
4150
   (set_attr "prefix_extra" "1")
4151
   (set_attr "length_immediate" "1")
4152
   (set_attr "memory" "none,store")
4153
   (set_attr "prefix" "vex")
4154
   (set_attr "mode" "")])
4155
 
4156
(define_insn_and_split "vec_extract_lo_"
4157
  [(set (match_operand: 0 "nonimmediate_operand" "=x,m")
4158
        (vec_select:
4159
          (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4160
          (parallel [(const_int 0) (const_int 1)
4161
                     (const_int 2) (const_int 3)])))]
4162
  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4163
  "#"
4164
  "&& reload_completed"
4165
  [(const_int 0)]
4166
{
4167
  rtx op1 = operands[1];
4168
  if (REG_P (op1))
4169
    op1 = gen_rtx_REG (mode, REGNO (op1));
4170
  else
4171
    op1 = gen_lowpart (mode, op1);
4172
  emit_move_insn (operands[0], op1);
4173
  DONE;
4174
})
4175
 
4176
(define_insn "vec_extract_hi_"
4177
  [(set (match_operand: 0 "nonimmediate_operand" "=x,m")
4178
        (vec_select:
4179
          (match_operand:VI4F_256 1 "register_operand" "x,x")
4180
          (parallel [(const_int 4) (const_int 5)
4181
                     (const_int 6) (const_int 7)])))]
4182
  "TARGET_AVX"
4183
  "vextract\t{$0x1, %1, %0|%0, %1, 0x1}"
4184
  [(set_attr "type" "sselog")
4185
   (set_attr "prefix_extra" "1")
4186
   (set_attr "length_immediate" "1")
4187
   (set_attr "memory" "none,store")
4188
   (set_attr "prefix" "vex")
4189
   (set_attr "mode" "")])
4190
 
4191
(define_insn_and_split "vec_extract_lo_v16hi"
4192
  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4193
        (vec_select:V8HI
4194
          (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4195
          (parallel [(const_int 0) (const_int 1)
4196
                     (const_int 2) (const_int 3)
4197
                     (const_int 4) (const_int 5)
4198
                     (const_int 6) (const_int 7)])))]
4199
  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4200
  "#"
4201
  "&& reload_completed"
4202
  [(const_int 0)]
4203
{
4204
  rtx op1 = operands[1];
4205
  if (REG_P (op1))
4206
    op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4207
  else
4208
    op1 = gen_lowpart (V8HImode, op1);
4209
  emit_move_insn (operands[0], op1);
4210
  DONE;
4211
})
4212
 
4213
(define_insn "vec_extract_hi_v16hi"
4214
  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4215
        (vec_select:V8HI
4216
          (match_operand:V16HI 1 "register_operand" "x,x")
4217
          (parallel [(const_int 8) (const_int 9)
4218
                     (const_int 10) (const_int 11)
4219
                     (const_int 12) (const_int 13)
4220
                     (const_int 14) (const_int 15)])))]
4221
  "TARGET_AVX"
4222
  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4223
  [(set_attr "type" "sselog")
4224
   (set_attr "prefix_extra" "1")
4225
   (set_attr "length_immediate" "1")
4226
   (set_attr "memory" "none,store")
4227
   (set_attr "prefix" "vex")
4228
   (set_attr "mode" "OI")])
4229
 
4230
(define_insn_and_split "vec_extract_lo_v32qi"
4231
  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4232
        (vec_select:V16QI
4233
          (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4234
          (parallel [(const_int 0) (const_int 1)
4235
                     (const_int 2) (const_int 3)
4236
                     (const_int 4) (const_int 5)
4237
                     (const_int 6) (const_int 7)
4238
                     (const_int 8) (const_int 9)
4239
                     (const_int 10) (const_int 11)
4240
                     (const_int 12) (const_int 13)
4241
                     (const_int 14) (const_int 15)])))]
4242
  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4243
  "#"
4244
  "&& reload_completed"
4245
  [(const_int 0)]
4246
{
4247
  rtx op1 = operands[1];
4248
  if (REG_P (op1))
4249
    op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4250
  else
4251
    op1 = gen_lowpart (V16QImode, op1);
4252
  emit_move_insn (operands[0], op1);
4253
  DONE;
4254
})
4255
 
4256
(define_insn "vec_extract_hi_v32qi"
4257
  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4258
        (vec_select:V16QI
4259
          (match_operand:V32QI 1 "register_operand" "x,x")
4260
          (parallel [(const_int 16) (const_int 17)
4261
                     (const_int 18) (const_int 19)
4262
                     (const_int 20) (const_int 21)
4263
                     (const_int 22) (const_int 23)
4264
                     (const_int 24) (const_int 25)
4265
                     (const_int 26) (const_int 27)
4266
                     (const_int 28) (const_int 29)
4267
                     (const_int 30) (const_int 31)])))]
4268
  "TARGET_AVX"
4269
  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4270
  [(set_attr "type" "sselog")
4271
   (set_attr "prefix_extra" "1")
4272
   (set_attr "length_immediate" "1")
4273
   (set_attr "memory" "none,store")
4274
   (set_attr "prefix" "vex")
4275
   (set_attr "mode" "OI")])
4276
 
4277
;; Modes handled by vec_extract patterns.
4278
(define_mode_iterator VEC_EXTRACT_MODE
4279
  [(V32QI "TARGET_AVX") V16QI
4280
   (V16HI "TARGET_AVX") V8HI
4281
   (V8SI "TARGET_AVX") V4SI
4282
   (V4DI "TARGET_AVX") V2DI
4283
   (V8SF "TARGET_AVX") V4SF
4284
   (V4DF "TARGET_AVX") V2DF])
4285
 
4286
(define_expand "vec_extract"
4287
  [(match_operand: 0 "register_operand" "")
4288
   (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4289
   (match_operand 2 "const_int_operand" "")]
4290
  "TARGET_SSE"
4291
{
4292
  ix86_expand_vector_extract (false, operands[0], operands[1],
4293
                              INTVAL (operands[2]));
4294
  DONE;
4295
})
4296
 
4297
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4298
;;
4299
;; Parallel double-precision floating point element swizzling
4300
;;
4301
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4302
 
4303
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4304
(define_insn "avx_unpckhpd256"
4305
  [(set (match_operand:V4DF 0 "register_operand" "=x")
4306
        (vec_select:V4DF
4307
          (vec_concat:V8DF
4308
            (match_operand:V4DF 1 "register_operand" "x")
4309
            (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4310
          (parallel [(const_int 1) (const_int 5)
4311
                     (const_int 3) (const_int 7)])))]
4312
  "TARGET_AVX"
4313
  "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4314
  [(set_attr "type" "sselog")
4315
   (set_attr "prefix" "vex")
4316
   (set_attr "mode" "V4DF")])
4317
 
4318
(define_expand "vec_interleave_highv4df"
4319
  [(set (match_dup 3)
4320
        (vec_select:V4DF
4321
          (vec_concat:V8DF
4322
            (match_operand:V4DF 1 "register_operand" "x")
4323
            (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4324
          (parallel [(const_int 0) (const_int 4)
4325
                     (const_int 2) (const_int 6)])))
4326
   (set (match_dup 4)
4327
        (vec_select:V4DF
4328
          (vec_concat:V8DF
4329
            (match_dup 1)
4330
            (match_dup 2))
4331
          (parallel [(const_int 1) (const_int 5)
4332
                     (const_int 3) (const_int 7)])))
4333
   (set (match_operand:V4DF 0 "register_operand" "")
4334
        (vec_select:V4DF
4335
          (vec_concat:V8DF
4336
            (match_dup 3)
4337
            (match_dup 4))
4338
          (parallel [(const_int 2) (const_int 3)
4339
                     (const_int 6) (const_int 7)])))]
4340
 "TARGET_AVX"
4341
{
4342
  operands[3] = gen_reg_rtx (V4DFmode);
4343
  operands[4] = gen_reg_rtx (V4DFmode);
4344
})
4345
 
4346
 
4347
(define_expand "vec_interleave_highv2df"
4348
  [(set (match_operand:V2DF 0 "register_operand" "")
4349
        (vec_select:V2DF
4350
          (vec_concat:V4DF
4351
            (match_operand:V2DF 1 "nonimmediate_operand" "")
4352
            (match_operand:V2DF 2 "nonimmediate_operand" ""))
4353
          (parallel [(const_int 1)
4354
                     (const_int 3)])))]
4355
  "TARGET_SSE2"
4356
{
4357
  if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4358
    operands[2] = force_reg (V2DFmode, operands[2]);
4359
})
4360
 
4361
(define_insn "*vec_interleave_highv2df"
4362
  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,x,x,m")
4363
        (vec_select:V2DF
4364
          (vec_concat:V4DF
4365
            (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4366
            (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4367
          (parallel [(const_int 1)
4368
                     (const_int 3)])))]
4369
  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4370
  "@
4371
   unpckhpd\t{%2, %0|%0, %2}
4372
   vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4373
   %vmovddup\t{%H1, %0|%0, %H1}
4374
   movlpd\t{%H1, %0|%0, %H1}
4375
   vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4376
   %vmovhpd\t{%1, %0|%0, %1}"
4377
  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4378
  (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4379
   (set_attr "prefix_data16" "*,*,*,1,*,1")
4380
   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4381
   (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4382
 
4383
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4384
(define_expand "avx_movddup256"
4385
  [(set (match_operand:V4DF 0 "register_operand" "")
4386
        (vec_select:V4DF
4387
          (vec_concat:V8DF
4388
            (match_operand:V4DF 1 "nonimmediate_operand" "")
4389
            (match_dup 1))
4390
          (parallel [(const_int 0) (const_int 4)
4391
                     (const_int 2) (const_int 6)])))]
4392
  "TARGET_AVX")
4393
 
4394
(define_expand "avx_unpcklpd256"
4395
  [(set (match_operand:V4DF 0 "register_operand" "")
4396
        (vec_select:V4DF
4397
          (vec_concat:V8DF
4398
            (match_operand:V4DF 1 "register_operand" "")
4399
            (match_operand:V4DF 2 "nonimmediate_operand" ""))
4400
          (parallel [(const_int 0) (const_int 4)
4401
                     (const_int 2) (const_int 6)])))]
4402
  "TARGET_AVX")
4403
 
4404
(define_insn "*avx_unpcklpd256"
4405
  [(set (match_operand:V4DF 0 "register_operand"         "=x,x")
4406
        (vec_select:V4DF
4407
          (vec_concat:V8DF
4408
            (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4409
            (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4410
          (parallel [(const_int 0) (const_int 4)
4411
                     (const_int 2) (const_int 6)])))]
4412
  "TARGET_AVX"
4413
  "@
4414
   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4415
   vmovddup\t{%1, %0|%0, %1}"
4416
  [(set_attr "type" "sselog")
4417
   (set_attr "prefix" "vex")
4418
   (set_attr "mode" "V4DF")])
4419
 
4420
(define_expand "vec_interleave_lowv4df"
4421
  [(set (match_dup 3)
4422
        (vec_select:V4DF
4423
          (vec_concat:V8DF
4424
            (match_operand:V4DF 1 "register_operand" "x")
4425
            (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4426
          (parallel [(const_int 0) (const_int 4)
4427
                     (const_int 2) (const_int 6)])))
4428
   (set (match_dup 4)
4429
        (vec_select:V4DF
4430
          (vec_concat:V8DF
4431
            (match_dup 1)
4432
            (match_dup 2))
4433
          (parallel [(const_int 1) (const_int 5)
4434
                     (const_int 3) (const_int 7)])))
4435
   (set (match_operand:V4DF 0 "register_operand" "")
4436
        (vec_select:V4DF
4437
          (vec_concat:V8DF
4438
            (match_dup 3)
4439
            (match_dup 4))
4440
          (parallel [(const_int 0) (const_int 1)
4441
                     (const_int 4) (const_int 5)])))]
4442
 "TARGET_AVX"
4443
{
4444
  operands[3] = gen_reg_rtx (V4DFmode);
4445
  operands[4] = gen_reg_rtx (V4DFmode);
4446
})
4447
 
4448
(define_expand "vec_interleave_lowv2df"
4449
  [(set (match_operand:V2DF 0 "register_operand" "")
4450
        (vec_select:V2DF
4451
          (vec_concat:V4DF
4452
            (match_operand:V2DF 1 "nonimmediate_operand" "")
4453
            (match_operand:V2DF 2 "nonimmediate_operand" ""))
4454
          (parallel [(const_int 0)
4455
                     (const_int 2)])))]
4456
  "TARGET_SSE2"
4457
{
4458
  if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4459
    operands[1] = force_reg (V2DFmode, operands[1]);
4460
})
4461
 
4462
(define_insn "*vec_interleave_lowv2df"
4463
  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,x,x,o")
4464
        (vec_select:V2DF
4465
          (vec_concat:V4DF
4466
            (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4467
            (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4468
          (parallel [(const_int 0)
4469
                     (const_int 2)])))]
4470
  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4471
  "@
4472
   unpcklpd\t{%2, %0|%0, %2}
4473
   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4474
   %vmovddup\t{%1, %0|%0, %1}
4475
   movhpd\t{%2, %0|%0, %2}
4476
   vmovhpd\t{%2, %1, %0|%0, %1, %2}
4477
   %vmovlpd\t{%2, %H0|%H0, %2}"
4478
  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4479
   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4480
   (set_attr "prefix_data16" "*,*,*,1,*,1")
4481
   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4482
   (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4483
 
4484
(define_split
4485
  [(set (match_operand:V2DF 0 "memory_operand" "")
4486
        (vec_select:V2DF
4487
          (vec_concat:V4DF
4488
            (match_operand:V2DF 1 "register_operand" "")
4489
            (match_dup 1))
4490
          (parallel [(const_int 0)
4491
                     (const_int 2)])))]
4492
  "TARGET_SSE3 && reload_completed"
4493
  [(const_int 0)]
4494
{
4495
  rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4496
  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4497
  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4498
  DONE;
4499
})
4500
 
4501
(define_split
4502
  [(set (match_operand:V2DF 0 "register_operand" "")
4503
        (vec_select:V2DF
4504
          (vec_concat:V4DF
4505
            (match_operand:V2DF 1 "memory_operand" "")
4506
            (match_dup 1))
4507
          (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4508
                     (match_operand:SI 3 "const_int_operand" "")])))]
4509
  "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4510
  [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4511
{
4512
  operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4513
})
4514
 
4515
(define_expand "avx_shufpd256"
4516
  [(match_operand:V4DF 0 "register_operand" "")
4517
   (match_operand:V4DF 1 "register_operand" "")
4518
   (match_operand:V4DF 2 "nonimmediate_operand" "")
4519
   (match_operand:SI 3 "const_int_operand" "")]
4520
  "TARGET_AVX"
4521
{
4522
  int mask = INTVAL (operands[3]);
4523
  emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4524
                                   GEN_INT (mask & 1),
4525
                                   GEN_INT (mask & 2 ? 5 : 4),
4526
                                   GEN_INT (mask & 4 ? 3 : 2),
4527
                                   GEN_INT (mask & 8 ? 7 : 6)));
4528
  DONE;
4529
})
4530
 
4531
(define_insn "avx_shufpd256_1"
4532
  [(set (match_operand:V4DF 0 "register_operand" "=x")
4533
        (vec_select:V4DF
4534
          (vec_concat:V8DF
4535
            (match_operand:V4DF 1 "register_operand" "x")
4536
            (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4537
          (parallel [(match_operand 3 "const_0_to_1_operand" "")
4538
                     (match_operand 4 "const_4_to_5_operand" "")
4539
                     (match_operand 5 "const_2_to_3_operand" "")
4540
                     (match_operand 6 "const_6_to_7_operand" "")])))]
4541
  "TARGET_AVX"
4542
{
4543
  int mask;
4544
  mask = INTVAL (operands[3]);
4545
  mask |= (INTVAL (operands[4]) - 4) << 1;
4546
  mask |= (INTVAL (operands[5]) - 2) << 2;
4547
  mask |= (INTVAL (operands[6]) - 6) << 3;
4548
  operands[3] = GEN_INT (mask);
4549
 
4550
  return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4551
}
4552
  [(set_attr "type" "sselog")
4553
   (set_attr "length_immediate" "1")
4554
   (set_attr "prefix" "vex")
4555
   (set_attr "mode" "V4DF")])
4556
 
4557
(define_expand "sse2_shufpd"
4558
  [(match_operand:V2DF 0 "register_operand" "")
4559
   (match_operand:V2DF 1 "register_operand" "")
4560
   (match_operand:V2DF 2 "nonimmediate_operand" "")
4561
   (match_operand:SI 3 "const_int_operand" "")]
4562
  "TARGET_SSE2"
4563
{
4564
  int mask = INTVAL (operands[3]);
4565
  emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4566
                                GEN_INT (mask & 1),
4567
                                GEN_INT (mask & 2 ? 3 : 2)));
4568
  DONE;
4569
})
4570
 
4571
;; punpcklqdq and punpckhqdq are shorter than shufpd.
4572
(define_insn "avx2_interleave_highv4di"
4573
  [(set (match_operand:V4DI 0 "register_operand" "=x")
4574
        (vec_select:V4DI
4575
          (vec_concat:V8DI
4576
            (match_operand:V4DI 1 "register_operand" "x")
4577
            (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4578
          (parallel [(const_int 1)
4579
                     (const_int 5)
4580
                     (const_int 3)
4581
                     (const_int 7)])))]
4582
  "TARGET_AVX2"
4583
  "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4584
  [(set_attr "type" "sselog")
4585
   (set_attr "prefix" "vex")
4586
   (set_attr "mode" "OI")])
4587
 
4588
(define_insn "vec_interleave_highv2di"
4589
  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4590
        (vec_select:V2DI
4591
          (vec_concat:V4DI
4592
            (match_operand:V2DI 1 "register_operand" "0,x")
4593
            (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4594
          (parallel [(const_int 1)
4595
                     (const_int 3)])))]
4596
  "TARGET_SSE2"
4597
  "@
4598
   punpckhqdq\t{%2, %0|%0, %2}
4599
   vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4600
  [(set_attr "isa" "noavx,avx")
4601
   (set_attr "type" "sselog")
4602
   (set_attr "prefix_data16" "1,*")
4603
   (set_attr "prefix" "orig,vex")
4604
   (set_attr "mode" "TI")])
4605
 
4606
(define_insn "avx2_interleave_lowv4di"
4607
  [(set (match_operand:V4DI 0 "register_operand" "=x")
4608
        (vec_select:V4DI
4609
          (vec_concat:V8DI
4610
            (match_operand:V4DI 1 "register_operand" "x")
4611
            (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4612
          (parallel [(const_int 0)
4613
                     (const_int 4)
4614
                     (const_int 2)
4615
                     (const_int 6)])))]
4616
  "TARGET_AVX2"
4617
  "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4618
  [(set_attr "type" "sselog")
4619
   (set_attr "prefix" "vex")
4620
   (set_attr "mode" "OI")])
4621
 
4622
(define_insn "vec_interleave_lowv2di"
4623
  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4624
        (vec_select:V2DI
4625
          (vec_concat:V4DI
4626
            (match_operand:V2DI 1 "register_operand" "0,x")
4627
            (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4628
          (parallel [(const_int 0)
4629
                     (const_int 2)])))]
4630
  "TARGET_SSE2"
4631
  "@
4632
   punpcklqdq\t{%2, %0|%0, %2}
4633
   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4634
  [(set_attr "isa" "noavx,avx")
4635
   (set_attr "type" "sselog")
4636
   (set_attr "prefix_data16" "1,*")
4637
   (set_attr "prefix" "orig,vex")
4638
   (set_attr "mode" "TI")])
4639
 
4640
(define_insn "sse2_shufpd_"
4641
  [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4642
        (vec_select:VI8F_128
4643
          (vec_concat:
4644
            (match_operand:VI8F_128 1 "register_operand" "0,x")
4645
            (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4646
          (parallel [(match_operand 3 "const_0_to_1_operand" "")
4647
                     (match_operand 4 "const_2_to_3_operand" "")])))]
4648
  "TARGET_SSE2"
4649
{
4650
  int mask;
4651
  mask = INTVAL (operands[3]);
4652
  mask |= (INTVAL (operands[4]) - 2) << 1;
4653
  operands[3] = GEN_INT (mask);
4654
 
4655
  switch (which_alternative)
4656
    {
4657
    case 0:
4658
      return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4659
    case 1:
4660
      return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4661
    default:
4662
      gcc_unreachable ();
4663
    }
4664
}
4665
  [(set_attr "isa" "noavx,avx")
4666
   (set_attr "type" "sselog")
4667
   (set_attr "length_immediate" "1")
4668
   (set_attr "prefix" "orig,vex")
4669
   (set_attr "mode" "V2DF")])
4670
 
4671
;; Avoid combining registers from different units in a single alternative,
4672
;; see comment above inline_secondary_memory_needed function in i386.c
4673
(define_insn "sse2_storehpd"
4674
  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,x,*f,r")
4675
        (vec_select:DF
4676
          (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4677
          (parallel [(const_int 1)])))]
4678
  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4679
  "@
4680
   %vmovhpd\t{%1, %0|%0, %1}
4681
   unpckhpd\t%0, %0
4682
   vunpckhpd\t{%d1, %0|%0, %d1}
4683
   #
4684
   #
4685
   #"
4686
  [(set_attr "isa" "*,noavx,avx,*,*,*")
4687
   (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4688
   (set (attr "prefix_data16")
4689
     (if_then_else
4690
       (and (eq_attr "alternative" "0")
4691
            (not (match_test "TARGET_AVX")))
4692
       (const_string "1")
4693
       (const_string "*")))
4694
   (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4695
   (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4696
 
4697
(define_split
4698
  [(set (match_operand:DF 0 "register_operand" "")
4699
        (vec_select:DF
4700
          (match_operand:V2DF 1 "memory_operand" "")
4701
          (parallel [(const_int 1)])))]
4702
  "TARGET_SSE2 && reload_completed"
4703
  [(set (match_dup 0) (match_dup 1))]
4704
  "operands[1] = adjust_address (operands[1], DFmode, 8);")
4705
 
4706
(define_insn "*vec_extractv2df_1_sse"
4707
  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4708
        (vec_select:DF
4709
          (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4710
          (parallel [(const_int 1)])))]
4711
  "!TARGET_SSE2 && TARGET_SSE
4712
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4713
  "@
4714
   movhps\t{%1, %0|%0, %1}
4715
   movhlps\t{%1, %0|%0, %1}
4716
   movlps\t{%H1, %0|%0, %H1}"
4717
  [(set_attr "type" "ssemov")
4718
   (set_attr "mode" "V2SF,V4SF,V2SF")])
4719
 
4720
;; Avoid combining registers from different units in a single alternative,
4721
;; see comment above inline_secondary_memory_needed function in i386.c
4722
(define_insn "sse2_storelpd"
4723
  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
4724
        (vec_select:DF
4725
          (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4726
          (parallel [(const_int 0)])))]
4727
  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4728
  "@
4729
   %vmovlpd\t{%1, %0|%0, %1}
4730
   #
4731
   #
4732
   #
4733
   #"
4734
  [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4735
   (set_attr "prefix_data16" "1,*,*,*,*")
4736
   (set_attr "prefix" "maybe_vex")
4737
   (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4738
 
4739
(define_split
4740
  [(set (match_operand:DF 0 "register_operand" "")
4741
        (vec_select:DF
4742
          (match_operand:V2DF 1 "nonimmediate_operand" "")
4743
          (parallel [(const_int 0)])))]
4744
  "TARGET_SSE2 && reload_completed"
4745
  [(const_int 0)]
4746
{
4747
  rtx op1 = operands[1];
4748
  if (REG_P (op1))
4749
    op1 = gen_rtx_REG (DFmode, REGNO (op1));
4750
  else
4751
    op1 = gen_lowpart (DFmode, op1);
4752
  emit_move_insn (operands[0], op1);
4753
  DONE;
4754
})
4755
 
4756
(define_insn "*vec_extractv2df_0_sse"
4757
  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4758
        (vec_select:DF
4759
          (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4760
          (parallel [(const_int 0)])))]
4761
  "!TARGET_SSE2 && TARGET_SSE
4762
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4763
  "@
4764
   movlps\t{%1, %0|%0, %1}
4765
   movaps\t{%1, %0|%0, %1}
4766
   movlps\t{%1, %0|%0, %1}"
4767
  [(set_attr "type" "ssemov")
4768
   (set_attr "mode" "V2SF,V4SF,V2SF")])
4769
 
4770
(define_expand "sse2_loadhpd_exp"
4771
  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4772
        (vec_concat:V2DF
4773
          (vec_select:DF
4774
            (match_operand:V2DF 1 "nonimmediate_operand" "")
4775
            (parallel [(const_int 0)]))
4776
          (match_operand:DF 2 "nonimmediate_operand" "")))]
4777
  "TARGET_SSE2"
4778
{
4779
  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4780
 
4781
  emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4782
 
4783
  /* Fix up the destination if needed.  */
4784
  if (dst != operands[0])
4785
    emit_move_insn (operands[0], dst);
4786
 
4787
  DONE;
4788
})
4789
 
4790
;; Avoid combining registers from different units in a single alternative,
4791
;; see comment above inline_secondary_memory_needed function in i386.c
4792
(define_insn "sse2_loadhpd"
4793
  [(set (match_operand:V2DF 0 "nonimmediate_operand"
4794
          "=x,x,x,x,o,o ,o")
4795
        (vec_concat:V2DF
4796
          (vec_select:DF
4797
            (match_operand:V2DF 1 "nonimmediate_operand"
4798
          " 0,x,0,x,0,0 ,0")
4799
            (parallel [(const_int 0)]))
4800
          (match_operand:DF 2 "nonimmediate_operand"
4801
          " m,m,x,x,x,*f,r")))]
4802
  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4803
  "@
4804
   movhpd\t{%2, %0|%0, %2}
4805
   vmovhpd\t{%2, %1, %0|%0, %1, %2}
4806
   unpcklpd\t{%2, %0|%0, %2}
4807
   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4808
   #
4809
   #
4810
   #"
4811
  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4812
   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4813
   (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4814
   (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4815
   (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4816
 
4817
(define_split
4818
  [(set (match_operand:V2DF 0 "memory_operand" "")
4819
        (vec_concat:V2DF
4820
          (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4821
          (match_operand:DF 1 "register_operand" "")))]
4822
  "TARGET_SSE2 && reload_completed"
4823
  [(set (match_dup 0) (match_dup 1))]
4824
  "operands[0] = adjust_address (operands[0], DFmode, 8);")
4825
 
4826
(define_expand "sse2_loadlpd_exp"
4827
  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4828
        (vec_concat:V2DF
4829
          (match_operand:DF 2 "nonimmediate_operand" "")
4830
          (vec_select:DF
4831
            (match_operand:V2DF 1 "nonimmediate_operand" "")
4832
            (parallel [(const_int 1)]))))]
4833
  "TARGET_SSE2"
4834
{
4835
  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4836
 
4837
  emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4838
 
4839
  /* Fix up the destination if needed.  */
4840
  if (dst != operands[0])
4841
    emit_move_insn (operands[0], dst);
4842
 
4843
  DONE;
4844
})
4845
 
4846
;; Avoid combining registers from different units in a single alternative,
4847
;; see comment above inline_secondary_memory_needed function in i386.c
4848
(define_insn "sse2_loadlpd"
4849
  [(set (match_operand:V2DF 0 "nonimmediate_operand"
4850
          "=x,x,x,x,x,x,x,x,m,m ,m")
4851
        (vec_concat:V2DF
4852
          (match_operand:DF 2 "nonimmediate_operand"
4853
          " m,m,m,x,x,0,0,x,x,*f,r")
4854
          (vec_select:DF
4855
            (match_operand:V2DF 1 "vector_move_operand"
4856
          " C,0,x,0,x,x,o,o,0,0 ,0")
4857
            (parallel [(const_int 1)]))))]
4858
  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4859
  "@
4860
   %vmovsd\t{%2, %0|%0, %2}
4861
   movlpd\t{%2, %0|%0, %2}
4862
   vmovlpd\t{%2, %1, %0|%0, %1, %2}
4863
   movsd\t{%2, %0|%0, %2}
4864
   vmovsd\t{%2, %1, %0|%0, %1, %2}
4865
   shufpd\t{$2, %1, %0|%0, %1, 2}
4866
   movhpd\t{%H1, %0|%0, %H1}
4867
   vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4868
   #
4869
   #
4870
   #"
4871
  [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4872
   (set (attr "type")
4873
     (cond [(eq_attr "alternative" "5")
4874
              (const_string "sselog")
4875
            (eq_attr "alternative" "9")
4876
              (const_string "fmov")
4877
            (eq_attr "alternative" "10")
4878
              (const_string "imov")
4879
           ]
4880
           (const_string "ssemov")))
4881
   (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4882
   (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4883
   (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4884
   (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4885
 
4886
(define_split
4887
  [(set (match_operand:V2DF 0 "memory_operand" "")
4888
        (vec_concat:V2DF
4889
          (match_operand:DF 1 "register_operand" "")
4890
          (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4891
  "TARGET_SSE2 && reload_completed"
4892
  [(set (match_dup 0) (match_dup 1))]
4893
  "operands[0] = adjust_address (operands[0], DFmode, 8);")
4894
 
4895
(define_insn "sse2_movsd"
4896
  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,x,x,m,x,x,x,o")
4897
        (vec_merge:V2DF
4898
          (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4899
          (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4900
          (const_int 1)))]
4901
  "TARGET_SSE2"
4902
  "@
4903
   movsd\t{%2, %0|%0, %2}
4904
   vmovsd\t{%2, %1, %0|%0, %1, %2}
4905
   movlpd\t{%2, %0|%0, %2}
4906
   vmovlpd\t{%2, %1, %0|%0, %1, %2}
4907
   %vmovlpd\t{%2, %0|%0, %2}
4908
   shufpd\t{$2, %1, %0|%0, %1, 2}
4909
   movhps\t{%H1, %0|%0, %H1}
4910
   vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4911
   %vmovhps\t{%1, %H0|%H0, %1}"
4912
  [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4913
   (set (attr "type")
4914
     (if_then_else
4915
       (eq_attr "alternative" "5")
4916
       (const_string "sselog")
4917
       (const_string "ssemov")))
4918
   (set (attr "prefix_data16")
4919
     (if_then_else
4920
       (and (eq_attr "alternative" "2,4")
4921
            (not (match_test "TARGET_AVX")))
4922
       (const_string "1")
4923
       (const_string "*")))
4924
   (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4925
   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4926
   (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4927
 
4928
(define_insn "vec_dupv2df"
4929
  [(set (match_operand:V2DF 0 "register_operand"     "=x,x")
4930
        (vec_duplicate:V2DF
4931
          (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
4932
  "TARGET_SSE2"
4933
  "@
4934
   unpcklpd\t%0, %0
4935
   %vmovddup\t{%1, %0|%0, %1}"
4936
  [(set_attr "isa" "noavx,sse3")
4937
   (set_attr "type" "sselog1")
4938
   (set_attr "prefix" "orig,maybe_vex")
4939
   (set_attr "mode" "V2DF")])
4940
 
4941
(define_insn "*vec_concatv2df"
4942
  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,x,x,x,x,x,x")
4943
        (vec_concat:V2DF
4944
          (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
4945
          (match_operand:DF 2 "vector_move_operand"  " x,x,1,m,m,C,x,m")))]
4946
  "TARGET_SSE"
4947
  "@
4948
   unpcklpd\t{%2, %0|%0, %2}
4949
   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4950
   %vmovddup\t{%1, %0|%0, %1}
4951
   movhpd\t{%2, %0|%0, %2}
4952
   vmovhpd\t{%2, %1, %0|%0, %1, %2}
4953
   %vmovsd\t{%1, %0|%0, %1}
4954
   movlhps\t{%2, %0|%0, %2}
4955
   movhps\t{%2, %0|%0, %2}"
4956
  [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
4957
   (set (attr "type")
4958
     (if_then_else
4959
       (eq_attr "alternative" "0,1,2")
4960
       (const_string "sselog")
4961
       (const_string "ssemov")))
4962
   (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
4963
   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
4964
   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
4965
 
4966
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4967
;;
4968
;; Parallel integral arithmetic
4969
;;
4970
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4971
 
4972
(define_expand "neg2"
4973
  [(set (match_operand:VI_AVX2 0 "register_operand" "")
4974
        (minus:VI_AVX2
4975
          (match_dup 2)
4976
          (match_operand:VI_AVX2 1 "nonimmediate_operand" "")))]
4977
  "TARGET_SSE2"
4978
  "operands[2] = force_reg (mode, CONST0_RTX (mode));")
4979
 
4980
(define_expand "3"
4981
  [(set (match_operand:VI_AVX2 0 "register_operand" "")
4982
        (plusminus:VI_AVX2
4983
          (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4984
          (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4985
  "TARGET_SSE2"
4986
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
4987
 
4988
(define_insn "*3"
4989
  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4990
        (plusminus:VI_AVX2
4991
          (match_operand:VI_AVX2 1 "nonimmediate_operand" "0,x")
4992
          (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4993
  "TARGET_SSE2 && ix86_binary_operator_ok (, mode, operands)"
4994
  "@
4995
   p\t{%2, %0|%0, %2}
4996
   vp\t{%2, %1, %0|%0, %1, %2}"
4997
  [(set_attr "isa" "noavx,avx")
4998
   (set_attr "type" "sseiadd")
4999
   (set_attr "prefix_data16" "1,*")
5000
   (set_attr "prefix" "orig,vex")
5001
   (set_attr "mode" "")])
5002
 
5003
(define_expand "_3"
5004
  [(set (match_operand:VI12_AVX2 0 "register_operand" "")
5005
        (sat_plusminus:VI12_AVX2
5006
          (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
5007
          (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
5008
  "TARGET_SSE2"
5009
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
5010
 
5011
(define_insn "*_3"
5012
  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
5013
        (sat_plusminus:VI12_AVX2
5014
          (match_operand:VI12_AVX2 1 "nonimmediate_operand" "0,x")
5015
          (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5016
  "TARGET_SSE2 && ix86_binary_operator_ok (, mode, operands)"
5017
  "@
5018
   p\t{%2, %0|%0, %2}
5019
   vp\t{%2, %1, %0|%0, %1, %2}"
5020
  [(set_attr "isa" "noavx,avx")
5021
   (set_attr "type" "sseiadd")
5022
   (set_attr "prefix_data16" "1,*")
5023
   (set_attr "prefix" "orig,vex")
5024
   (set_attr "mode" "TI")])
5025
 
5026
(define_insn_and_split "mul3"
5027
  [(set (match_operand:VI1_AVX2 0 "register_operand" "")
5028
        (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
5029
                       (match_operand:VI1_AVX2 2 "register_operand" "")))]
5030
  "TARGET_SSE2
5031
   && can_create_pseudo_p ()"
5032
  "#"
5033
  "&& 1"
5034
  [(const_int 0)]
5035
{
5036
  rtx t[6];
5037
  int i;
5038
  enum machine_mode mulmode = mode;
5039
 
5040
  for (i = 0; i < 6; ++i)
5041
    t[i] = gen_reg_rtx (mode);
5042
 
5043
  /* Unpack data such that we've got a source byte in each low byte of
5044
     each word.  We don't care what goes into the high byte of each word.
5045
     Rather than trying to get zero in there, most convenient is to let
5046
     it be a copy of the low byte.  */
5047
  emit_insn (gen__interleave_high (t[0], operands[1],
5048
                                                   operands[1]));
5049
  emit_insn (gen__interleave_high (t[1], operands[2],
5050
                                                   operands[2]));
5051
  emit_insn (gen__interleave_low (t[2], operands[1],
5052
                                                  operands[1]));
5053
  emit_insn (gen__interleave_low (t[3], operands[2],
5054
                                                  operands[2]));
5055
 
5056
  /* Multiply words.  The end-of-line annotations here give a picture of what
5057
     the output of that instruction looks like.  Dot means don't care; the
5058
     letters are the bytes of the result with A being the most significant.  */
5059
  emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
5060
                          gen_rtx_MULT (mulmode,        /* .A.B.C.D.E.F.G.H */
5061
                                        gen_lowpart (mulmode, t[0]),
5062
                                        gen_lowpart (mulmode, t[1]))));
5063
  emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
5064
                          gen_rtx_MULT (mulmode,        /* .I.J.K.L.M.N.O.P */
5065
                                        gen_lowpart (mulmode, t[2]),
5066
                                        gen_lowpart (mulmode, t[3]))));
5067
 
5068
  /* Extract the even bytes and merge them back together.  */
5069
  if (mode == V16QImode)
5070
    ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5071
  else
5072
    {
5073
      /* Since avx2_interleave_{low,high}v32qi used above aren't cross-lane,
5074
         this can't be normal even extraction, but one where additionally
5075
         the second and third quarter are swapped.  That is even one insn
5076
         shorter than even extraction.  */
5077
      rtvec v = rtvec_alloc (32);
5078
      for (i = 0; i < 32; ++i)
5079
        RTVEC_ELT (v, i)
5080
          = GEN_INT (i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0));
5081
      t[0] = operands[0];
5082
      t[1] = t[5];
5083
      t[2] = t[4];
5084
      t[3] = gen_rtx_CONST_VECTOR (mode, v);
5085
      ix86_expand_vec_perm_const (t);
5086
    }
5087
 
5088
  set_unique_reg_note (get_last_insn (), REG_EQUAL,
5089
                       gen_rtx_MULT (mode, operands[1], operands[2]));
5090
  DONE;
5091
})
5092
 
5093
(define_expand "mul3"
5094
  [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5095
        (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
5096
                       (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
5097
  "TARGET_SSE2"
5098
  "ix86_fixup_binary_operands_no_copy (MULT, mode, operands);")
5099
 
5100
(define_insn "*mul3"
5101
  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5102
        (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
5103
                       (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5104
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, mode, operands)"
5105
  "@
5106
   pmullw\t{%2, %0|%0, %2}
5107
   vpmullw\t{%2, %1, %0|%0, %1, %2}"
5108
  [(set_attr "isa" "noavx,avx")
5109
   (set_attr "type" "sseimul")
5110
   (set_attr "prefix_data16" "1,*")
5111
   (set_attr "prefix" "orig,vex")
5112
   (set_attr "mode" "")])
5113
 
5114
(define_expand "mul3_highpart"
5115
  [(set (match_operand:VI2_AVX2 0 "register_operand" "")
5116
        (truncate:VI2_AVX2
5117
          (lshiftrt:
5118
            (mult:
5119
              (any_extend:
5120
                (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
5121
              (any_extend:
5122
                (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
5123
            (const_int 16))))]
5124
  "TARGET_SSE2"
5125
  "ix86_fixup_binary_operands_no_copy (MULT, mode, operands);")
5126
 
5127
(define_insn "*mul3_highpart"
5128
  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
5129
        (truncate:VI2_AVX2
5130
          (lshiftrt:
5131
            (mult:
5132
              (any_extend:
5133
                (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5134
              (any_extend:
5135
                (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5136
            (const_int 16))))]
5137
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, mode, operands)"
5138
  "@
5139
   pmulhw\t{%2, %0|%0, %2}
5140
   vpmulhw\t{%2, %1, %0|%0, %1, %2}"
5141
  [(set_attr "isa" "noavx,avx")
5142
   (set_attr "type" "sseimul")
5143
   (set_attr "prefix_data16" "1,*")
5144
   (set_attr "prefix" "orig,vex")
5145
   (set_attr "mode" "")])
5146
 
5147
(define_expand "avx2_umulv4siv4di3"
5148
  [(set (match_operand:V4DI 0 "register_operand" "")
5149
        (mult:V4DI
5150
          (zero_extend:V4DI
5151
            (vec_select:V4SI
5152
              (match_operand:V8SI 1 "nonimmediate_operand" "")
5153
              (parallel [(const_int 0) (const_int 2)
5154
                         (const_int 4) (const_int 6)])))
5155
          (zero_extend:V4DI
5156
            (vec_select:V4SI
5157
              (match_operand:V8SI 2 "nonimmediate_operand" "")
5158
              (parallel [(const_int 0) (const_int 2)
5159
                         (const_int 4) (const_int 6)])))))]
5160
  "TARGET_AVX2"
5161
  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5162
 
5163
(define_insn "*avx_umulv4siv4di3"
5164
  [(set (match_operand:V4DI 0 "register_operand" "=x")
5165
        (mult:V4DI
5166
          (zero_extend:V4DI
5167
            (vec_select:V4SI
5168
              (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5169
              (parallel [(const_int 0) (const_int 2)
5170
                         (const_int 4) (const_int 6)])))
5171
          (zero_extend:V4DI
5172
            (vec_select:V4SI
5173
              (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5174
              (parallel [(const_int 0) (const_int 2)
5175
                         (const_int 4) (const_int 6)])))))]
5176
  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5177
  "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5178
  [(set_attr "type" "sseimul")
5179
   (set_attr "prefix" "vex")
5180
   (set_attr "mode" "OI")])
5181
 
5182
(define_expand "sse2_umulv2siv2di3"
5183
  [(set (match_operand:V2DI 0 "register_operand" "")
5184
        (mult:V2DI
5185
          (zero_extend:V2DI
5186
            (vec_select:V2SI
5187
              (match_operand:V4SI 1 "nonimmediate_operand" "")
5188
              (parallel [(const_int 0) (const_int 2)])))
5189
          (zero_extend:V2DI
5190
            (vec_select:V2SI
5191
              (match_operand:V4SI 2 "nonimmediate_operand" "")
5192
              (parallel [(const_int 0) (const_int 2)])))))]
5193
  "TARGET_SSE2"
5194
  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5195
 
5196
(define_insn "*sse2_umulv2siv2di3"
5197
  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5198
        (mult:V2DI
5199
          (zero_extend:V2DI
5200
            (vec_select:V2SI
5201
              (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5202
              (parallel [(const_int 0) (const_int 2)])))
5203
          (zero_extend:V2DI
5204
            (vec_select:V2SI
5205
              (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5206
              (parallel [(const_int 0) (const_int 2)])))))]
5207
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5208
  "@
5209
   pmuludq\t{%2, %0|%0, %2}
5210
   vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5211
  [(set_attr "isa" "noavx,avx")
5212
   (set_attr "type" "sseimul")
5213
   (set_attr "prefix_data16" "1,*")
5214
   (set_attr "prefix" "orig,vex")
5215
   (set_attr "mode" "TI")])
5216
 
5217
(define_expand "avx2_mulv4siv4di3"
5218
  [(set (match_operand:V4DI 0 "register_operand" "")
5219
        (mult:V4DI
5220
          (sign_extend:V4DI
5221
            (vec_select:V4SI
5222
              (match_operand:V8SI 1 "nonimmediate_operand" "")
5223
              (parallel [(const_int 0) (const_int 2)
5224
                         (const_int 4) (const_int 6)])))
5225
          (sign_extend:V4DI
5226
            (vec_select:V4SI
5227
              (match_operand:V8SI 2 "nonimmediate_operand" "")
5228
              (parallel [(const_int 0) (const_int 2)
5229
                         (const_int 4) (const_int 6)])))))]
5230
  "TARGET_AVX2"
5231
  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5232
 
5233
(define_insn "*avx2_mulv4siv4di3"
5234
  [(set (match_operand:V4DI 0 "register_operand" "=x")
5235
        (mult:V4DI
5236
          (sign_extend:V4DI
5237
            (vec_select:V4SI
5238
              (match_operand:V8SI 1 "nonimmediate_operand" "x")
5239
              (parallel [(const_int 0) (const_int 2)
5240
                         (const_int 4) (const_int 6)])))
5241
          (sign_extend:V4DI
5242
            (vec_select:V4SI
5243
              (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5244
              (parallel [(const_int 0) (const_int 2)
5245
                         (const_int 4) (const_int 6)])))))]
5246
  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5247
  "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5248
  [(set_attr "isa" "avx")
5249
   (set_attr "type" "sseimul")
5250
   (set_attr "prefix_extra" "1")
5251
   (set_attr "prefix" "vex")
5252
   (set_attr "mode" "OI")])
5253
 
5254
(define_expand "sse4_1_mulv2siv2di3"
5255
  [(set (match_operand:V2DI 0 "register_operand" "")
5256
        (mult:V2DI
5257
          (sign_extend:V2DI
5258
            (vec_select:V2SI
5259
              (match_operand:V4SI 1 "nonimmediate_operand" "")
5260
              (parallel [(const_int 0) (const_int 2)])))
5261
          (sign_extend:V2DI
5262
            (vec_select:V2SI
5263
              (match_operand:V4SI 2 "nonimmediate_operand" "")
5264
              (parallel [(const_int 0) (const_int 2)])))))]
5265
  "TARGET_SSE4_1"
5266
  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5267
 
5268
(define_insn "*sse4_1_mulv2siv2di3"
5269
  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5270
        (mult:V2DI
5271
          (sign_extend:V2DI
5272
            (vec_select:V2SI
5273
              (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5274
              (parallel [(const_int 0) (const_int 2)])))
5275
          (sign_extend:V2DI
5276
            (vec_select:V2SI
5277
              (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5278
              (parallel [(const_int 0) (const_int 2)])))))]
5279
  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5280
  "@
5281
   pmuldq\t{%2, %0|%0, %2}
5282
   vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5283
  [(set_attr "isa" "noavx,avx")
5284
   (set_attr "type" "sseimul")
5285
   (set_attr "prefix_data16" "1,*")
5286
   (set_attr "prefix_extra" "1")
5287
   (set_attr "prefix" "orig,vex")
5288
   (set_attr "mode" "TI")])
5289
 
5290
(define_expand "avx2_pmaddwd"
5291
  [(set (match_operand:V8SI 0 "register_operand" "")
5292
        (plus:V8SI
5293
          (mult:V8SI
5294
            (sign_extend:V8SI
5295
              (vec_select:V8HI
5296
                (match_operand:V16HI 1 "nonimmediate_operand" "")
5297
                (parallel [(const_int 0)
5298
                           (const_int 2)
5299
                           (const_int 4)
5300
                           (const_int 6)
5301
                           (const_int 8)
5302
                           (const_int 10)
5303
                           (const_int 12)
5304
                           (const_int 14)])))
5305
            (sign_extend:V8SI
5306
              (vec_select:V8HI
5307
                (match_operand:V16HI 2 "nonimmediate_operand" "")
5308
                (parallel [(const_int 0)
5309
                           (const_int 2)
5310
                           (const_int 4)
5311
                           (const_int 6)
5312
                           (const_int 8)
5313
                           (const_int 10)
5314
                           (const_int 12)
5315
                           (const_int 14)]))))
5316
          (mult:V8SI
5317
            (sign_extend:V8SI
5318
              (vec_select:V8HI (match_dup 1)
5319
                (parallel [(const_int 1)
5320
                           (const_int 3)
5321
                           (const_int 5)
5322
                           (const_int 7)
5323
                           (const_int 9)
5324
                           (const_int 11)
5325
                           (const_int 13)
5326
                           (const_int 15)])))
5327
            (sign_extend:V8SI
5328
              (vec_select:V8HI (match_dup 2)
5329
                (parallel [(const_int 1)
5330
                           (const_int 3)
5331
                           (const_int 5)
5332
                           (const_int 7)
5333
                           (const_int 9)
5334
                           (const_int 11)
5335
                           (const_int 13)
5336
                           (const_int 15)]))))))]
5337
  "TARGET_AVX2"
5338
  "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5339
 
5340
(define_expand "sse2_pmaddwd"
5341
  [(set (match_operand:V4SI 0 "register_operand" "")
5342
        (plus:V4SI
5343
          (mult:V4SI
5344
            (sign_extend:V4SI
5345
              (vec_select:V4HI
5346
                (match_operand:V8HI 1 "nonimmediate_operand" "")
5347
                (parallel [(const_int 0)
5348
                           (const_int 2)
5349
                           (const_int 4)
5350
                           (const_int 6)])))
5351
            (sign_extend:V4SI
5352
              (vec_select:V4HI
5353
                (match_operand:V8HI 2 "nonimmediate_operand" "")
5354
                (parallel [(const_int 0)
5355
                           (const_int 2)
5356
                           (const_int 4)
5357
                           (const_int 6)]))))
5358
          (mult:V4SI
5359
            (sign_extend:V4SI
5360
              (vec_select:V4HI (match_dup 1)
5361
                (parallel [(const_int 1)
5362
                           (const_int 3)
5363
                           (const_int 5)
5364
                           (const_int 7)])))
5365
            (sign_extend:V4SI
5366
              (vec_select:V4HI (match_dup 2)
5367
                (parallel [(const_int 1)
5368
                           (const_int 3)
5369
                           (const_int 5)
5370
                           (const_int 7)]))))))]
5371
  "TARGET_SSE2"
5372
  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5373
 
5374
(define_insn "*avx2_pmaddwd"
5375
  [(set (match_operand:V8SI 0 "register_operand" "=x")
5376
        (plus:V8SI
5377
          (mult:V8SI
5378
            (sign_extend:V8SI
5379
              (vec_select:V8HI
5380
                (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5381
                (parallel [(const_int 0)
5382
                           (const_int 2)
5383
                           (const_int 4)
5384
                           (const_int 6)
5385
                           (const_int 8)
5386
                           (const_int 10)
5387
                           (const_int 12)
5388
                           (const_int 14)])))
5389
            (sign_extend:V8SI
5390
              (vec_select:V8HI
5391
                (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5392
                (parallel [(const_int 0)
5393
                           (const_int 2)
5394
                           (const_int 4)
5395
                           (const_int 6)
5396
                           (const_int 8)
5397
                           (const_int 10)
5398
                           (const_int 12)
5399
                           (const_int 14)]))))
5400
          (mult:V8SI
5401
            (sign_extend:V8SI
5402
              (vec_select:V8HI (match_dup 1)
5403
                (parallel [(const_int 1)
5404
                           (const_int 3)
5405
                           (const_int 5)
5406
                           (const_int 7)
5407
                           (const_int 9)
5408
                           (const_int 11)
5409
                           (const_int 13)
5410
                           (const_int 15)])))
5411
            (sign_extend:V8SI
5412
              (vec_select:V8HI (match_dup 2)
5413
                (parallel [(const_int 1)
5414
                           (const_int 3)
5415
                           (const_int 5)
5416
                           (const_int 7)
5417
                           (const_int 9)
5418
                           (const_int 11)
5419
                           (const_int 13)
5420
                           (const_int 15)]))))))]
5421
  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5422
  "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5423
  [(set_attr "type" "sseiadd")
5424
   (set_attr "prefix" "vex")
5425
   (set_attr "mode" "OI")])
5426
 
5427
(define_insn "*sse2_pmaddwd"
5428
  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5429
        (plus:V4SI
5430
          (mult:V4SI
5431
            (sign_extend:V4SI
5432
              (vec_select:V4HI
5433
                (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5434
                (parallel [(const_int 0)
5435
                           (const_int 2)
5436
                           (const_int 4)
5437
                           (const_int 6)])))
5438
            (sign_extend:V4SI
5439
              (vec_select:V4HI
5440
                (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5441
                (parallel [(const_int 0)
5442
                           (const_int 2)
5443
                           (const_int 4)
5444
                           (const_int 6)]))))
5445
          (mult:V4SI
5446
            (sign_extend:V4SI
5447
              (vec_select:V4HI (match_dup 1)
5448
                (parallel [(const_int 1)
5449
                           (const_int 3)
5450
                           (const_int 5)
5451
                           (const_int 7)])))
5452
            (sign_extend:V4SI
5453
              (vec_select:V4HI (match_dup 2)
5454
                (parallel [(const_int 1)
5455
                           (const_int 3)
5456
                           (const_int 5)
5457
                           (const_int 7)]))))))]
5458
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5459
  "@
5460
   pmaddwd\t{%2, %0|%0, %2}
5461
   vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5462
  [(set_attr "isa" "noavx,avx")
5463
   (set_attr "type" "sseiadd")
5464
   (set_attr "atom_unit" "simul")
5465
   (set_attr "prefix_data16" "1,*")
5466
   (set_attr "prefix" "orig,vex")
5467
   (set_attr "mode" "TI")])
5468
 
5469
(define_expand "mul3"
5470
  [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5471
        (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5472
                       (match_operand:VI4_AVX2 2 "register_operand" "")))]
5473
  "TARGET_SSE2"
5474
{
5475
  if (TARGET_SSE4_1 || TARGET_AVX)
5476
    ix86_fixup_binary_operands_no_copy (MULT, mode, operands);
5477
})
5478
 
5479
(define_insn "*_mul3"
5480
  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5481
        (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5482
                       (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5483
  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, mode, operands)"
5484
  "@
5485
   pmulld\t{%2, %0|%0, %2}
5486
   vpmulld\t{%2, %1, %0|%0, %1, %2}"
5487
  [(set_attr "isa" "noavx,avx")
5488
   (set_attr "type" "sseimul")
5489
   (set_attr "prefix_extra" "1")
5490
   (set_attr "prefix" "orig,vex")
5491
   (set_attr "mode" "")])
5492
 
5493
(define_insn_and_split "*sse2_mulv4si3"
5494
  [(set (match_operand:V4SI 0 "register_operand" "")
5495
        (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5496
                   (match_operand:V4SI 2 "register_operand" "")))]
5497
  "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5498
   && can_create_pseudo_p ()"
5499
  "#"
5500
  "&& 1"
5501
  [(const_int 0)]
5502
{
5503
  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5504
  rtx op0, op1, op2;
5505
 
5506
  op0 = operands[0];
5507
  op1 = operands[1];
5508
  op2 = operands[2];
5509
  t1 = gen_reg_rtx (V4SImode);
5510
  t2 = gen_reg_rtx (V4SImode);
5511
  t3 = gen_reg_rtx (V4SImode);
5512
  t4 = gen_reg_rtx (V4SImode);
5513
  t5 = gen_reg_rtx (V4SImode);
5514
  t6 = gen_reg_rtx (V4SImode);
5515
  thirtytwo = GEN_INT (32);
5516
 
5517
  /* Multiply elements 2 and 0.  */
5518
  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5519
                                     op1, op2));
5520
 
5521
  /* Shift both input vectors down one element, so that elements 3
5522
     and 1 are now in the slots for elements 2 and 0.  For K8, at
5523
     least, this is faster than using a shuffle.  */
5524
  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5525
                                 gen_lowpart (V1TImode, op1),
5526
                                 thirtytwo));
5527
  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5528
                                 gen_lowpart (V1TImode, op2),
5529
                                 thirtytwo));
5530
  /* Multiply elements 3 and 1.  */
5531
  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5532
                                     t2, t3));
5533
 
5534
  /* Move the results in element 2 down to element 1; we don't care
5535
     what goes in elements 2 and 3.  */
5536
  emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5537
                                const0_rtx, const0_rtx));
5538
  emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5539
                                const0_rtx, const0_rtx));
5540
 
5541
  /* Merge the parts back together.  */
5542
  emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5543
 
5544
  set_unique_reg_note (get_last_insn (), REG_EQUAL,
5545
                       gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5546
  DONE;
5547
})
5548
 
5549
(define_insn_and_split "mul3"
5550
  [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5551
        (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5552
                       (match_operand:VI8_AVX2 2 "register_operand" "")))]
5553
  "TARGET_SSE2
5554
   && can_create_pseudo_p ()"
5555
  "#"
5556
  "&& 1"
5557
  [(const_int 0)]
5558
{
5559
  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5560
  rtx op0, op1, op2;
5561
 
5562
  op0 = operands[0];
5563
  op1 = operands[1];
5564
  op2 = operands[2];
5565
 
5566
  if (TARGET_XOP && mode == V2DImode)
5567
    {
5568
      /* op1: A,B,C,D, op2: E,F,G,H */
5569
      op1 = gen_lowpart (V4SImode, op1);
5570
      op2 = gen_lowpart (V4SImode, op2);
5571
 
5572
      t1 = gen_reg_rtx (V4SImode);
5573
      t2 = gen_reg_rtx (V4SImode);
5574
      t3 = gen_reg_rtx (V2DImode);
5575
      t4 = gen_reg_rtx (V2DImode);
5576
 
5577
      /* t1: B,A,D,C */
5578
      emit_insn (gen_sse2_pshufd_1 (t1, op1,
5579
                                    GEN_INT (1),
5580
                                    GEN_INT (0),
5581
                                    GEN_INT (3),
5582
                                    GEN_INT (2)));
5583
 
5584
      /* t2: (B*E),(A*F),(D*G),(C*H) */
5585
      emit_insn (gen_mulv4si3 (t2, t1, op2));
5586
 
5587
      /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5588
      emit_insn (gen_xop_phadddq (t3, t2));
5589
 
5590
      /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5591
      emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5592
 
5593
      /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5594
      emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5595
    }
5596
  else
5597
    {
5598
      t1 = gen_reg_rtx (mode);
5599
      t2 = gen_reg_rtx (mode);
5600
      t3 = gen_reg_rtx (mode);
5601
      t4 = gen_reg_rtx (mode);
5602
      t5 = gen_reg_rtx (mode);
5603
      t6 = gen_reg_rtx (mode);
5604
      thirtytwo = GEN_INT (32);
5605
 
5606
      /* Multiply low parts.  */
5607
      emit_insn (gen__umulvsi3
5608
                  (t1, gen_lowpart (mode, op1),
5609
                   gen_lowpart (mode, op2)));
5610
 
5611
      /* Shift input vectors right 32 bits so we can multiply high parts.  */
5612
      emit_insn (gen_lshr3 (t2, op1, thirtytwo));
5613
      emit_insn (gen_lshr3 (t3, op2, thirtytwo));
5614
 
5615
      /* Multiply high parts by low parts.  */
5616
      emit_insn (gen__umulvsi3
5617
                  (t4, gen_lowpart (mode, op1),
5618
                   gen_lowpart (mode, t3)));
5619
      emit_insn (gen__umulvsi3
5620
                  (t5, gen_lowpart (mode, op2),
5621
                   gen_lowpart (mode, t2)));
5622
 
5623
      /* Shift them back.  */
5624
      emit_insn (gen_ashl3 (t4, t4, thirtytwo));
5625
      emit_insn (gen_ashl3 (t5, t5, thirtytwo));
5626
 
5627
      /* Add the three parts together.  */
5628
      emit_insn (gen_add3 (t6, t1, t4));
5629
      emit_insn (gen_add3 (op0, t6, t5));
5630
    }
5631
 
5632
  set_unique_reg_note (get_last_insn (), REG_EQUAL,
5633
                       gen_rtx_MULT (mode, operands[1], operands[2]));
5634
  DONE;
5635
})
5636
 
5637
(define_expand "vec_widen_mult_hi_"
5638
  [(match_operand: 0 "register_operand" "")
5639
   (any_extend:
5640
     (match_operand:VI2_AVX2 1 "register_operand" ""))
5641
   (match_operand:VI2_AVX2 2 "register_operand" "")]
5642
  "TARGET_SSE2"
5643
{
5644
  rtx op1, op2, t1, t2, dest;
5645
 
5646
  op1 = operands[1];
5647
  op2 = operands[2];
5648
  t1 = gen_reg_rtx (mode);
5649
  t2 = gen_reg_rtx (mode);
5650
  dest = gen_lowpart (mode, operands[0]);
5651
 
5652
  emit_insn (gen_mul3 (t1, op1, op2));
5653
  emit_insn (gen_mul3_highpart (t2, op1, op2));
5654
  emit_insn (gen_vec_interleave_high (dest, t1, t2));
5655
  DONE;
5656
})
5657
 
5658
(define_expand "vec_widen_mult_lo_"
5659
  [(match_operand: 0 "register_operand" "")
5660
   (any_extend:
5661
     (match_operand:VI2_AVX2 1 "register_operand" ""))
5662
   (match_operand:VI2_AVX2 2 "register_operand" "")]
5663
  "TARGET_SSE2"
5664
{
5665
  rtx op1, op2, t1, t2, dest;
5666
 
5667
  op1 = operands[1];
5668
  op2 = operands[2];
5669
  t1 = gen_reg_rtx (mode);
5670
  t2 = gen_reg_rtx (mode);
5671
  dest = gen_lowpart (mode, operands[0]);
5672
 
5673
  emit_insn (gen_mul3 (t1, op1, op2));
5674
  emit_insn (gen_mul3_highpart (t2, op1, op2));
5675
  emit_insn (gen_vec_interleave_low (dest, t1, t2));
5676
  DONE;
5677
})
5678
 
5679
(define_expand "vec_widen_mult_hi_v8si"
5680
  [(match_operand:V4DI 0 "register_operand" "")
5681
   (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5682
   (match_operand:V8SI 2 "nonimmediate_operand" "")]
5683
  "TARGET_AVX2"
5684
{
5685
  rtx t1, t2, t3, t4;
5686
 
5687
  t1 = gen_reg_rtx (V4DImode);
5688
  t2 = gen_reg_rtx (V4DImode);
5689
  t3 = gen_reg_rtx (V8SImode);
5690
  t4 = gen_reg_rtx (V8SImode);
5691
  emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5692
                                  const0_rtx, const2_rtx,
5693
                                  const1_rtx, GEN_INT (3)));
5694
  emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5695
                                  const0_rtx, const2_rtx,
5696
                                  const1_rtx, GEN_INT (3)));
5697
  emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5698
                                GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5699
  emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5700
                                GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5701
  emit_insn (gen_avx2_mulv4siv4di3 (operands[0], t3, t4));
5702
  DONE;
5703
})
5704
 
5705
(define_expand "vec_widen_mult_lo_v8si"
5706
  [(match_operand:V4DI 0 "register_operand" "")
5707
   (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5708
   (match_operand:V8SI 2 "nonimmediate_operand" "")]
5709
  "TARGET_AVX2"
5710
{
5711
  rtx t1, t2, t3, t4;
5712
 
5713
  t1 = gen_reg_rtx (V4DImode);
5714
  t2 = gen_reg_rtx (V4DImode);
5715
  t3 = gen_reg_rtx (V8SImode);
5716
  t4 = gen_reg_rtx (V8SImode);
5717
  emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5718
                                  const0_rtx, const2_rtx,
5719
                                  const1_rtx, GEN_INT (3)));
5720
  emit_insn (gen_avx2_permv4di_1 (t2,  gen_lowpart (V4DImode, operands[2]),
5721
                                  const0_rtx, const2_rtx,
5722
                                  const1_rtx, GEN_INT (3)));
5723
  emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5724
                                GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5725
  emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5726
                                GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5727
  emit_insn (gen_avx2_mulv4siv4di3 (operands[0], t3, t4));
5728
  DONE;
5729
})
5730
 
5731
(define_expand "vec_widen_smult_hi_v4si"
5732
  [(match_operand:V2DI 0 "register_operand" "")
5733
   (match_operand:V4SI 1 "register_operand" "")
5734
   (match_operand:V4SI 2 "register_operand" "")]
5735
  "TARGET_SSE4_1"
5736
{
5737
  rtx op1, op2, t1, t2;
5738
 
5739
  op1 = operands[1];
5740
  op2 = operands[2];
5741
  t1 = gen_reg_rtx (V4SImode);
5742
  t2 = gen_reg_rtx (V4SImode);
5743
 
5744
  if (TARGET_XOP)
5745
    {
5746
      emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5747
                                    GEN_INT (1), GEN_INT (3)));
5748
      emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5749
                                    GEN_INT (1), GEN_INT (3)));
5750
      emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5751
      DONE;
5752
    }
5753
 
5754
  emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5755
  emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5756
  emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5757
  DONE;
5758
})
5759
 
5760
(define_expand "vec_widen_smult_lo_v4si"
5761
  [(match_operand:V2DI 0 "register_operand" "")
5762
   (match_operand:V4SI 1 "register_operand" "")
5763
   (match_operand:V4SI 2 "register_operand" "")]
5764
  "TARGET_SSE4_1"
5765
{
5766
  rtx op1, op2, t1, t2;
5767
 
5768
  op1 = operands[1];
5769
  op2 = operands[2];
5770
  t1 = gen_reg_rtx (V4SImode);
5771
  t2 = gen_reg_rtx (V4SImode);
5772
 
5773
  if (TARGET_XOP)
5774
    {
5775
      emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5776
                                    GEN_INT (1), GEN_INT (3)));
5777
      emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5778
                                    GEN_INT (1), GEN_INT (3)));
5779
      emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5780
      DONE;
5781
    }
5782
 
5783
  emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5784
  emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5785
  emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5786
  DONE;
5787
})
5788
 
5789
(define_expand "vec_widen_umult_hi_v4si"
5790
  [(match_operand:V2DI 0 "register_operand" "")
5791
   (match_operand:V4SI 1 "register_operand" "")
5792
   (match_operand:V4SI 2 "register_operand" "")]
5793
  "TARGET_SSE2"
5794
{
5795
  rtx op1, op2, t1, t2;
5796
 
5797
  op1 = operands[1];
5798
  op2 = operands[2];
5799
  t1 = gen_reg_rtx (V4SImode);
5800
  t2 = gen_reg_rtx (V4SImode);
5801
 
5802
  emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5803
  emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5804
  emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5805
  DONE;
5806
})
5807
 
5808
(define_expand "vec_widen_umult_lo_v4si"
5809
  [(match_operand:V2DI 0 "register_operand" "")
5810
   (match_operand:V4SI 1 "register_operand" "")
5811
   (match_operand:V4SI 2 "register_operand" "")]
5812
  "TARGET_SSE2"
5813
{
5814
  rtx op1, op2, t1, t2;
5815
 
5816
  op1 = operands[1];
5817
  op2 = operands[2];
5818
  t1 = gen_reg_rtx (V4SImode);
5819
  t2 = gen_reg_rtx (V4SImode);
5820
 
5821
  emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5822
  emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5823
  emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5824
  DONE;
5825
})
5826
 
5827
(define_expand "sdot_prod"
5828
  [(match_operand: 0 "register_operand" "")
5829
   (match_operand:VI2_AVX2 1 "register_operand" "")
5830
   (match_operand:VI2_AVX2 2 "register_operand" "")
5831
   (match_operand: 3 "register_operand" "")]
5832
  "TARGET_SSE2"
5833
{
5834
  rtx t = gen_reg_rtx (mode);
5835
  emit_insn (gen__pmaddwd (t, operands[1], operands[2]));
5836
  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5837
                          gen_rtx_PLUS (mode,
5838
                                        operands[3], t)));
5839
  DONE;
5840
})
5841
 
5842
(define_code_attr sse2_sse4_1
5843
   [(zero_extend "sse2") (sign_extend "sse4_1")])
5844
 
5845
(define_expand "dot_prodv4si"
5846
  [(match_operand:V2DI 0 "register_operand" "")
5847
   (any_extend:V2DI (match_operand:V4SI 1 "register_operand" ""))
5848
   (match_operand:V4SI 2 "register_operand" "")
5849
   (match_operand:V2DI 3 "register_operand" "")]
5850
  " == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
5851
{
5852
  rtx t1, t2, t3, t4;
5853
 
5854
  t1 = gen_reg_rtx (V2DImode);
5855
  emit_insn (gen__mulv2siv2di3 (t1, operands[1], operands[2]));
5856
  emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5857
 
5858
  t2 = gen_reg_rtx (V4SImode);
5859
  t3 = gen_reg_rtx (V4SImode);
5860
  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5861
                                 gen_lowpart (V1TImode, operands[1]),
5862
                                 GEN_INT (32)));
5863
  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5864
                                 gen_lowpart (V1TImode, operands[2]),
5865
                                 GEN_INT (32)));
5866
 
5867
  t4 = gen_reg_rtx (V2DImode);
5868
  emit_insn (gen__mulv2siv2di3 (t4, t2, t3));
5869
 
5870
  emit_insn (gen_addv2di3 (operands[0], t1, t4));
5871
  DONE;
5872
})
5873
 
5874
(define_expand "dot_prodv8si"
5875
  [(match_operand:V4DI 0 "register_operand" "")
5876
   (any_extend:V4DI (match_operand:V8SI 1 "register_operand" ""))
5877
   (match_operand:V8SI 2 "register_operand" "")
5878
   (match_operand:V4DI 3 "register_operand" "")]
5879
  "TARGET_AVX2"
5880
{
5881
  rtx t1, t2, t3, t4;
5882
 
5883
  t1 = gen_reg_rtx (V4DImode);
5884
  emit_insn (gen_avx2_mulv4siv4di3 (t1, operands[1], operands[2]));
5885
  emit_insn (gen_addv4di3 (t1, t1, operands[3]));
5886
 
5887
  t2 = gen_reg_rtx (V8SImode);
5888
  t3 = gen_reg_rtx (V8SImode);
5889
  emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
5890
                                 gen_lowpart (V2TImode, operands[1]),
5891
                                 GEN_INT (32)));
5892
  emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
5893
                                 gen_lowpart (V2TImode, operands[2]),
5894
                                 GEN_INT (32)));
5895
 
5896
  t4 = gen_reg_rtx (V4DImode);
5897
  emit_insn (gen_avx2_mulv4siv4di3 (t4, t2, t3));
5898
 
5899
  emit_insn (gen_addv4di3 (operands[0], t1, t4));
5900
  DONE;
5901
})
5902
 
5903
(define_insn "ashr3"
5904
  [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5905
        (ashiftrt:VI24_AVX2
5906
          (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5907
          (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5908
  "TARGET_SSE2"
5909
  "@
5910
   psra\t{%2, %0|%0, %2}
5911
   vpsra\t{%2, %1, %0|%0, %1, %2}"
5912
  [(set_attr "isa" "noavx,avx")
5913
   (set_attr "type" "sseishft")
5914
   (set (attr "length_immediate")
5915
     (if_then_else (match_operand 2 "const_int_operand" "")
5916
       (const_string "1")
5917
       (const_string "0")))
5918
   (set_attr "prefix_data16" "1,*")
5919
   (set_attr "prefix" "orig,vex")
5920
   (set_attr "mode" "")])
5921
 
5922
(define_insn "3"
5923
  [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5924
        (any_lshift:VI248_AVX2
5925
          (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5926
          (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5927
  "TARGET_SSE2"
5928
  "@
5929
   p\t{%2, %0|%0, %2}
5930
   vp\t{%2, %1, %0|%0, %1, %2}"
5931
  [(set_attr "isa" "noavx,avx")
5932
   (set_attr "type" "sseishft")
5933
   (set (attr "length_immediate")
5934
     (if_then_else (match_operand 2 "const_int_operand" "")
5935
       (const_string "1")
5936
       (const_string "0")))
5937
   (set_attr "prefix_data16" "1,*")
5938
   (set_attr "prefix" "orig,vex")
5939
   (set_attr "mode" "")])
5940
 
5941
(define_expand "vec_shl_"
5942
  [(set (match_operand:VI_128 0 "register_operand" "")
5943
        (ashift:V1TI
5944
         (match_operand:VI_128 1 "register_operand" "")
5945
         (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5946
  "TARGET_SSE2"
5947
{
5948
  operands[0] = gen_lowpart (V1TImode, operands[0]);
5949
  operands[1] = gen_lowpart (V1TImode, operands[1]);
5950
})
5951
 
5952
(define_insn "_ashl3"
5953
  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5954
        (ashift:VIMAX_AVX2
5955
         (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5956
         (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5957
  "TARGET_SSE2"
5958
{
5959
  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5960
 
5961
  switch (which_alternative)
5962
    {
5963
    case 0:
5964
      return "pslldq\t{%2, %0|%0, %2}";
5965
    case 1:
5966
      return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5967
    default:
5968
      gcc_unreachable ();
5969
    }
5970
}
5971
  [(set_attr "isa" "noavx,avx")
5972
   (set_attr "type" "sseishft")
5973
   (set_attr "length_immediate" "1")
5974
   (set_attr "prefix_data16" "1,*")
5975
   (set_attr "prefix" "orig,vex")
5976
   (set_attr "mode" "")])
5977
 
5978
(define_expand "vec_shr_"
5979
  [(set (match_operand:VI_128 0 "register_operand" "")
5980
        (lshiftrt:V1TI
5981
         (match_operand:VI_128 1 "register_operand" "")
5982
         (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5983
  "TARGET_SSE2"
5984
{
5985
  operands[0] = gen_lowpart (V1TImode, operands[0]);
5986
  operands[1] = gen_lowpart (V1TImode, operands[1]);
5987
})
5988
 
5989
(define_insn "_lshr3"
5990
  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5991
        (lshiftrt:VIMAX_AVX2
5992
         (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5993
         (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5994
  "TARGET_SSE2"
5995
{
5996
  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5997
 
5998
  switch (which_alternative)
5999
    {
6000
    case 0:
6001
      return "psrldq\t{%2, %0|%0, %2}";
6002
    case 1:
6003
      return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
6004
    default:
6005
      gcc_unreachable ();
6006
    }
6007
}
6008
  [(set_attr "isa" "noavx,avx")
6009
   (set_attr "type" "sseishft")
6010
   (set_attr "length_immediate" "1")
6011
   (set_attr "atom_unit" "sishuf")
6012
   (set_attr "prefix_data16" "1,*")
6013
   (set_attr "prefix" "orig,vex")
6014
   (set_attr "mode" "")])
6015
 
6016
 
6017
(define_expand "3"
6018
  [(set (match_operand:VI124_256 0 "register_operand" "")
6019
        (maxmin:VI124_256
6020
          (match_operand:VI124_256 1 "nonimmediate_operand" "")
6021
          (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
6022
  "TARGET_AVX2"
6023
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
6024
 
6025
(define_insn "*avx2_3"
6026
  [(set (match_operand:VI124_256 0 "register_operand" "=x")
6027
        (maxmin:VI124_256
6028
          (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
6029
          (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
6030
  "TARGET_AVX2 && ix86_binary_operator_ok (, mode, operands)"
6031
  "vp\t{%2, %1, %0|%0, %1, %2}"
6032
  [(set_attr "type" "sseiadd")
6033
   (set_attr "prefix_extra" "1")
6034
   (set_attr "prefix" "vex")
6035
   (set_attr "mode" "OI")])
6036
 
6037
(define_expand "3"
6038
  [(set (match_operand:VI8_AVX2 0 "register_operand" "")
6039
        (maxmin:VI8_AVX2
6040
          (match_operand:VI8_AVX2 1 "register_operand" "")
6041
          (match_operand:VI8_AVX2 2 "register_operand" "")))]
6042
  "TARGET_SSE4_2"
6043
{
6044
  enum rtx_code code;
6045
  rtx xops[6];
6046
  bool ok;
6047
 
6048
  xops[0] = operands[0];
6049
 
6050
  if ( == SMAX ||  == UMAX)
6051
    {
6052
      xops[1] = operands[1];
6053
      xops[2] = operands[2];
6054
    }
6055
  else
6056
    {
6057
      xops[1] = operands[2];
6058
      xops[2] = operands[1];
6059
    }
6060
 
6061
  code = ( == UMAX ||  == UMIN) ? GTU : GT;
6062
 
6063
  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
6064
  xops[4] = operands[1];
6065
  xops[5] = operands[2];
6066
 
6067
  ok = ix86_expand_int_vcond (xops);
6068
  gcc_assert (ok);
6069
  DONE;
6070
})
6071
 
6072
(define_expand "3"
6073
  [(set (match_operand:VI124_128 0 "register_operand" "")
6074
        (smaxmin:VI124_128
6075
          (match_operand:VI124_128 1 "nonimmediate_operand" "")
6076
          (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6077
  "TARGET_SSE2"
6078
{
6079
  if (TARGET_SSE4_1 || mode == V8HImode)
6080
    ix86_fixup_binary_operands_no_copy (, mode, operands);
6081
  else
6082
    {
6083
      rtx xops[6];
6084
      bool ok;
6085
 
6086
      xops[0] = operands[0];
6087
      operands[1] = force_reg (mode, operands[1]);
6088
      operands[2] = force_reg (mode, operands[2]);
6089
 
6090
      if ( == SMAX)
6091
        {
6092
          xops[1] = operands[1];
6093
          xops[2] = operands[2];
6094
        }
6095
      else
6096
        {
6097
          xops[1] = operands[2];
6098
          xops[2] = operands[1];
6099
        }
6100
 
6101
      xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6102
      xops[4] = operands[1];
6103
      xops[5] = operands[2];
6104
 
6105
      ok = ix86_expand_int_vcond (xops);
6106
      gcc_assert (ok);
6107
      DONE;
6108
    }
6109
})
6110
 
6111
(define_insn "*sse4_1_3"
6112
  [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
6113
        (smaxmin:VI14_128
6114
          (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
6115
          (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
6116
  "TARGET_SSE4_1 && ix86_binary_operator_ok (, mode, operands)"
6117
  "@
6118
   p\t{%2, %0|%0, %2}
6119
   vp\t{%2, %1, %0|%0, %1, %2}"
6120
  [(set_attr "isa" "noavx,avx")
6121
   (set_attr "type" "sseiadd")
6122
   (set_attr "prefix_extra" "1,*")
6123
   (set_attr "prefix" "orig,vex")
6124
   (set_attr "mode" "TI")])
6125
 
6126
(define_insn "*v8hi3"
6127
  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6128
        (smaxmin:V8HI
6129
          (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
6130
          (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
6131
  "TARGET_SSE2 && ix86_binary_operator_ok (, V8HImode, operands)"
6132
  "@
6133
   pw\t{%2, %0|%0, %2}
6134
   vpw\t{%2, %1, %0|%0, %1, %2}"
6135
  [(set_attr "isa" "noavx,avx")
6136
   (set_attr "type" "sseiadd")
6137
   (set_attr "prefix_data16" "1,*")
6138
   (set_attr "prefix_extra" "*,1")
6139
   (set_attr "prefix" "orig,vex")
6140
   (set_attr "mode" "TI")])
6141
 
6142
(define_expand "3"
6143
  [(set (match_operand:VI124_128 0 "register_operand" "")
6144
        (umaxmin:VI124_128
6145
          (match_operand:VI124_128 1 "nonimmediate_operand" "")
6146
          (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6147
  "TARGET_SSE2"
6148
{
6149
  if (TARGET_SSE4_1 || mode == V16QImode)
6150
    ix86_fixup_binary_operands_no_copy (, mode, operands);
6151
  else if ( == UMAX && mode == V8HImode)
6152
    {
6153
      rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6154
      operands[1] = force_reg (mode, operands[1]);
6155
      if (rtx_equal_p (op3, op2))
6156
        op3 = gen_reg_rtx (V8HImode);
6157
      emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6158
      emit_insn (gen_addv8hi3 (op0, op3, op2));
6159
      DONE;
6160
    }
6161
  else
6162
    {
6163
      rtx xops[6];
6164
      bool ok;
6165
 
6166
      operands[1] = force_reg (mode, operands[1]);
6167
      operands[2] = force_reg (mode, operands[2]);
6168
 
6169
      xops[0] = operands[0];
6170
 
6171
      if ( == UMAX)
6172
        {
6173
          xops[1] = operands[1];
6174
          xops[2] = operands[2];
6175
        }
6176
      else
6177
        {
6178
          xops[1] = operands[2];
6179
          xops[2] = operands[1];
6180
        }
6181
 
6182
      xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6183
      xops[4] = operands[1];
6184
      xops[5] = operands[2];
6185
 
6186
      ok = ix86_expand_int_vcond (xops);
6187
      gcc_assert (ok);
6188
      DONE;
6189
    }
6190
})
6191
 
6192
(define_insn "*sse4_1_3"
6193
  [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6194
        (umaxmin:VI24_128
6195
          (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6196
          (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6197
  "TARGET_SSE4_1 && ix86_binary_operator_ok (, mode, operands)"
6198
  "@
6199
   p\t{%2, %0|%0, %2}
6200
   vp\t{%2, %1, %0|%0, %1, %2}"
6201
  [(set_attr "isa" "noavx,avx")
6202
   (set_attr "type" "sseiadd")
6203
   (set_attr "prefix_extra" "1,*")
6204
   (set_attr "prefix" "orig,vex")
6205
   (set_attr "mode" "TI")])
6206
 
6207
(define_insn "*v16qi3"
6208
  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6209
        (umaxmin:V16QI
6210
          (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6211
          (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6212
  "TARGET_SSE2 && ix86_binary_operator_ok (, V16QImode, operands)"
6213
  "@
6214
   pb\t{%2, %0|%0, %2}
6215
   vpb\t{%2, %1, %0|%0, %1, %2}"
6216
  [(set_attr "isa" "noavx,avx")
6217
   (set_attr "type" "sseiadd")
6218
   (set_attr "prefix_data16" "1,*")
6219
   (set_attr "prefix_extra" "*,1")
6220
   (set_attr "prefix" "orig,vex")
6221
   (set_attr "mode" "TI")])
6222
 
6223
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6224
;;
6225
;; Parallel integral comparisons
6226
;;
6227
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6228
 
6229
(define_expand "avx2_eq3"
6230
  [(set (match_operand:VI_256 0 "register_operand" "")
6231
        (eq:VI_256
6232
          (match_operand:VI_256 1 "nonimmediate_operand" "")
6233
          (match_operand:VI_256 2 "nonimmediate_operand" "")))]
6234
  "TARGET_AVX2"
6235
  "ix86_fixup_binary_operands_no_copy (EQ, mode, operands);")
6236
 
6237
(define_insn "*avx2_eq3"
6238
  [(set (match_operand:VI_256 0 "register_operand" "=x")
6239
        (eq:VI_256
6240
          (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6241
          (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6242
  "TARGET_AVX2 && ix86_binary_operator_ok (EQ, mode, operands)"
6243
  "vpcmpeq\t{%2, %1, %0|%0, %1, %2}"
6244
  [(set_attr "type" "ssecmp")
6245
   (set_attr "prefix_extra" "1")
6246
   (set_attr "prefix" "vex")
6247
   (set_attr "mode" "OI")])
6248
 
6249
(define_insn "*sse4_1_eqv2di3"
6250
  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6251
        (eq:V2DI
6252
          (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6253
          (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6254
  "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6255
  "@
6256
   pcmpeqq\t{%2, %0|%0, %2}
6257
   vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6258
  [(set_attr "isa" "noavx,avx")
6259
   (set_attr "type" "ssecmp")
6260
   (set_attr "prefix_extra" "1")
6261
   (set_attr "prefix" "orig,vex")
6262
   (set_attr "mode" "TI")])
6263
 
6264
(define_insn "*sse2_eq3"
6265
  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6266
        (eq:VI124_128
6267
          (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6268
          (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6269
  "TARGET_SSE2 && !TARGET_XOP
6270
   && ix86_binary_operator_ok (EQ, mode, operands)"
6271
  "@
6272
   pcmpeq\t{%2, %0|%0, %2}
6273
   vpcmpeq\t{%2, %1, %0|%0, %1, %2}"
6274
  [(set_attr "isa" "noavx,avx")
6275
   (set_attr "type" "ssecmp")
6276
   (set_attr "prefix_data16" "1,*")
6277
   (set_attr "prefix" "orig,vex")
6278
   (set_attr "mode" "TI")])
6279
 
6280
(define_expand "sse2_eq3"
6281
  [(set (match_operand:VI124_128 0 "register_operand" "")
6282
        (eq:VI124_128
6283
          (match_operand:VI124_128 1 "nonimmediate_operand" "")
6284
          (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6285
  "TARGET_SSE2 && !TARGET_XOP "
6286
  "ix86_fixup_binary_operands_no_copy (EQ, mode, operands);")
6287
 
6288
(define_expand "sse4_1_eqv2di3"
6289
  [(set (match_operand:V2DI 0 "register_operand" "")
6290
        (eq:V2DI
6291
          (match_operand:V2DI 1 "nonimmediate_operand" "")
6292
          (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6293
  "TARGET_SSE4_1"
6294
  "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6295
 
6296
(define_insn "sse4_2_gtv2di3"
6297
  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6298
        (gt:V2DI
6299
          (match_operand:V2DI 1 "register_operand" "0,x")
6300
          (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6301
  "TARGET_SSE4_2"
6302
  "@
6303
   pcmpgtq\t{%2, %0|%0, %2}
6304
   vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6305
  [(set_attr "isa" "noavx,avx")
6306
   (set_attr "type" "ssecmp")
6307
   (set_attr "prefix_extra" "1")
6308
   (set_attr "prefix" "orig,vex")
6309
   (set_attr "mode" "TI")])
6310
 
6311
(define_insn "avx2_gt3"
6312
  [(set (match_operand:VI_256 0 "register_operand" "=x")
6313
        (gt:VI_256
6314
          (match_operand:VI_256 1 "register_operand" "x")
6315
          (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6316
  "TARGET_AVX2"
6317
  "vpcmpgt\t{%2, %1, %0|%0, %1, %2}"
6318
  [(set_attr "type" "ssecmp")
6319
   (set_attr "prefix_extra" "1")
6320
   (set_attr "prefix" "vex")
6321
   (set_attr "mode" "OI")])
6322
 
6323
(define_insn "sse2_gt3"
6324
  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6325
        (gt:VI124_128
6326
          (match_operand:VI124_128 1 "register_operand" "0,x")
6327
          (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6328
  "TARGET_SSE2 && !TARGET_XOP"
6329
  "@
6330
   pcmpgt\t{%2, %0|%0, %2}
6331
   vpcmpgt\t{%2, %1, %0|%0, %1, %2}"
6332
  [(set_attr "isa" "noavx,avx")
6333
   (set_attr "type" "ssecmp")
6334
   (set_attr "prefix_data16" "1,*")
6335
   (set_attr "prefix" "orig,vex")
6336
   (set_attr "mode" "TI")])
6337
 
6338
(define_expand "vcond"
6339
  [(set (match_operand:V_256 0 "register_operand" "")
6340
        (if_then_else:V_256
6341
          (match_operator 3 ""
6342
            [(match_operand:VI_256 4 "nonimmediate_operand" "")
6343
             (match_operand:VI_256 5 "general_operand" "")])
6344
          (match_operand:V_256 1 "" "")
6345
          (match_operand:V_256 2 "" "")))]
6346
  "TARGET_AVX2
6347
   && (GET_MODE_NUNITS (mode)
6348
       == GET_MODE_NUNITS (mode))"
6349
{
6350
  bool ok = ix86_expand_int_vcond (operands);
6351
  gcc_assert (ok);
6352
  DONE;
6353
})
6354
 
6355
(define_expand "vcond"
6356
  [(set (match_operand:V_128 0 "register_operand" "")
6357
        (if_then_else:V_128
6358
          (match_operator 3 ""
6359
            [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6360
             (match_operand:VI124_128 5 "general_operand" "")])
6361
          (match_operand:V_128 1 "" "")
6362
          (match_operand:V_128 2 "" "")))]
6363
  "TARGET_SSE2
6364
   && (GET_MODE_NUNITS (mode)
6365
       == GET_MODE_NUNITS (mode))"
6366
{
6367
  bool ok = ix86_expand_int_vcond (operands);
6368
  gcc_assert (ok);
6369
  DONE;
6370
})
6371
 
6372
(define_expand "vcondv2di"
6373
  [(set (match_operand:VI8F_128 0 "register_operand" "")
6374
        (if_then_else:VI8F_128
6375
          (match_operator 3 ""
6376
            [(match_operand:V2DI 4 "nonimmediate_operand" "")
6377
             (match_operand:V2DI 5 "general_operand" "")])
6378
          (match_operand:VI8F_128 1 "" "")
6379
          (match_operand:VI8F_128 2 "" "")))]
6380
  "TARGET_SSE4_2"
6381
{
6382
  bool ok = ix86_expand_int_vcond (operands);
6383
  gcc_assert (ok);
6384
  DONE;
6385
})
6386
 
6387
(define_expand "vcondu"
6388
  [(set (match_operand:V_256 0 "register_operand" "")
6389
        (if_then_else:V_256
6390
          (match_operator 3 ""
6391
            [(match_operand:VI_256 4 "nonimmediate_operand" "")
6392
             (match_operand:VI_256 5 "nonimmediate_operand" "")])
6393
          (match_operand:V_256 1 "general_operand" "")
6394
          (match_operand:V_256 2 "general_operand" "")))]
6395
  "TARGET_AVX2
6396
   && (GET_MODE_NUNITS (mode)
6397
       == GET_MODE_NUNITS (mode))"
6398
{
6399
  bool ok = ix86_expand_int_vcond (operands);
6400
  gcc_assert (ok);
6401
  DONE;
6402
})
6403
 
6404
(define_expand "vcondu"
6405
  [(set (match_operand:V_128 0 "register_operand" "")
6406
        (if_then_else:V_128
6407
          (match_operator 3 ""
6408
            [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6409
             (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6410
          (match_operand:V_128 1 "general_operand" "")
6411
          (match_operand:V_128 2 "general_operand" "")))]
6412
  "TARGET_SSE2
6413
   && (GET_MODE_NUNITS (mode)
6414
       == GET_MODE_NUNITS (mode))"
6415
{
6416
  bool ok = ix86_expand_int_vcond (operands);
6417
  gcc_assert (ok);
6418
  DONE;
6419
})
6420
 
6421
(define_expand "vconduv2di"
6422
  [(set (match_operand:VI8F_128 0 "register_operand" "")
6423
        (if_then_else:VI8F_128
6424
          (match_operator 3 ""
6425
            [(match_operand:V2DI 4 "nonimmediate_operand" "")
6426
             (match_operand:V2DI 5 "nonimmediate_operand" "")])
6427
          (match_operand:VI8F_128 1 "general_operand" "")
6428
          (match_operand:VI8F_128 2 "general_operand" "")))]
6429
  "TARGET_SSE4_2"
6430
{
6431
  bool ok = ix86_expand_int_vcond (operands);
6432
  gcc_assert (ok);
6433
  DONE;
6434
})
6435
 
6436
(define_mode_iterator VEC_PERM_AVX2
6437
  [V16QI V8HI V4SI V2DI V4SF V2DF
6438
   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6439
   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6440
   (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6441
 
6442
(define_expand "vec_perm"
6443
  [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
6444
   (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
6445
   (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
6446
   (match_operand: 3 "register_operand" "")]
6447
  "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6448
{
6449
  ix86_expand_vec_perm (operands);
6450
  DONE;
6451
})
6452
 
6453
(define_mode_iterator VEC_PERM_CONST
6454
  [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6455
   (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6456
   (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6457
   (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6458
   (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6459
   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6460
 
6461
(define_expand "vec_perm_const"
6462
  [(match_operand:VEC_PERM_CONST 0 "register_operand" "")
6463
   (match_operand:VEC_PERM_CONST 1 "register_operand" "")
6464
   (match_operand:VEC_PERM_CONST 2 "register_operand" "")
6465
   (match_operand: 3 "" "")]
6466
  ""
6467
{
6468
  if (ix86_expand_vec_perm_const (operands))
6469
    DONE;
6470
  else
6471
    FAIL;
6472
})
6473
 
6474
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6475
;;
6476
;; Parallel bitwise logical operations
6477
;;
6478
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6479
 
6480
(define_expand "one_cmpl2"
6481
  [(set (match_operand:VI 0 "register_operand" "")
6482
        (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6483
                (match_dup 2)))]
6484
  "TARGET_SSE"
6485
{
6486
  int i, n = GET_MODE_NUNITS (mode);
6487
  rtvec v = rtvec_alloc (n);
6488
 
6489
  for (i = 0; i < n; ++i)
6490
    RTVEC_ELT (v, i) = constm1_rtx;
6491
 
6492
  operands[2] = force_reg (mode, gen_rtx_CONST_VECTOR (mode, v));
6493
})
6494
 
6495
(define_expand "_andnot3"
6496
  [(set (match_operand:VI_AVX2 0 "register_operand" "")
6497
        (and:VI_AVX2
6498
          (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6499
          (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6500
  "TARGET_SSE2")
6501
 
6502
(define_insn "*andnot3"
6503
  [(set (match_operand:VI 0 "register_operand" "=x,x")
6504
        (and:VI
6505
          (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6506
          (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6507
  "TARGET_SSE"
6508
{
6509
  static char buf[32];
6510
  const char *ops;
6511
  const char *tmp;
6512
 
6513
  switch (get_attr_mode (insn))
6514
    {
6515
    case MODE_OI:
6516
      gcc_assert (TARGET_AVX2);
6517
    case MODE_TI:
6518
      gcc_assert (TARGET_SSE2);
6519
 
6520
      tmp = "pandn";
6521
      break;
6522
 
6523
   case MODE_V8SF:
6524
      gcc_assert (TARGET_AVX);
6525
   case MODE_V4SF:
6526
      gcc_assert (TARGET_SSE);
6527
 
6528
      tmp = "andnps";
6529
      break;
6530
 
6531
   default:
6532
      gcc_unreachable ();
6533
   }
6534
 
6535
  switch (which_alternative)
6536
    {
6537
    case 0:
6538
      ops = "%s\t{%%2, %%0|%%0, %%2}";
6539
      break;
6540
    case 1:
6541
      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6542
      break;
6543
    default:
6544
      gcc_unreachable ();
6545
    }
6546
 
6547
  snprintf (buf, sizeof (buf), ops, tmp);
6548
  return buf;
6549
}
6550
  [(set_attr "isa" "noavx,avx")
6551
   (set_attr "type" "sselog")
6552
   (set (attr "prefix_data16")
6553
     (if_then_else
6554
       (and (eq_attr "alternative" "0")
6555
            (eq_attr "mode" "TI"))
6556
       (const_string "1")
6557
       (const_string "*")))
6558
   (set_attr "prefix" "orig,vex")
6559
   (set (attr "mode")
6560
     (cond [(and (not (match_test "TARGET_AVX2"))
6561
                 (match_test "GET_MODE_SIZE (mode) > 16"))
6562
              (const_string "V8SF")
6563
            (not (match_test "TARGET_SSE2"))
6564
              (const_string "V4SF")
6565
           ]
6566
           (const_string "")))])
6567
 
6568
(define_expand "3"
6569
  [(set (match_operand:VI 0 "register_operand" "")
6570
        (any_logic:VI
6571
          (match_operand:VI 1 "nonimmediate_operand" "")
6572
          (match_operand:VI 2 "nonimmediate_operand" "")))]
6573
  "TARGET_SSE"
6574
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
6575
 
6576
(define_insn "*3"
6577
  [(set (match_operand:VI 0 "register_operand" "=x,x")
6578
        (any_logic:VI
6579
          (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6580
          (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6581
  "TARGET_SSE
6582
   && ix86_binary_operator_ok (, mode, operands)"
6583
{
6584
  static char buf[32];
6585
  const char *ops;
6586
  const char *tmp;
6587
 
6588
  switch (get_attr_mode (insn))
6589
    {
6590
    case MODE_OI:
6591
      gcc_assert (TARGET_AVX2);
6592
    case MODE_TI:
6593
      gcc_assert (TARGET_SSE2);
6594
 
6595
      tmp = "p";
6596
      break;
6597
 
6598
   case MODE_V8SF:
6599
      gcc_assert (TARGET_AVX);
6600
   case MODE_V4SF:
6601
      gcc_assert (TARGET_SSE);
6602
 
6603
      tmp = "ps";
6604
      break;
6605
 
6606
   default:
6607
      gcc_unreachable ();
6608
   }
6609
 
6610
  switch (which_alternative)
6611
    {
6612
    case 0:
6613
      ops = "%s\t{%%2, %%0|%%0, %%2}";
6614
      break;
6615
    case 1:
6616
      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6617
      break;
6618
    default:
6619
      gcc_unreachable ();
6620
    }
6621
 
6622
  snprintf (buf, sizeof (buf), ops, tmp);
6623
  return buf;
6624
}
6625
  [(set_attr "isa" "noavx,avx")
6626
   (set_attr "type" "sselog")
6627
   (set (attr "prefix_data16")
6628
     (if_then_else
6629
       (and (eq_attr "alternative" "0")
6630
            (eq_attr "mode" "TI"))
6631
       (const_string "1")
6632
       (const_string "*")))
6633
   (set_attr "prefix" "orig,vex")
6634
   (set (attr "mode")
6635
     (cond [(and (not (match_test "TARGET_AVX2"))
6636
                 (match_test "GET_MODE_SIZE (mode) > 16"))
6637
              (const_string "V8SF")
6638
            (not (match_test "TARGET_SSE2"))
6639
              (const_string "V4SF")
6640
           ]
6641
           (const_string "")))])
6642
 
6643
(define_insn "*andnottf3"
6644
  [(set (match_operand:TF 0 "register_operand" "=x,x")
6645
        (and:TF
6646
          (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6647
          (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6648
  "TARGET_SSE2"
6649
  "@
6650
   pandn\t{%2, %0|%0, %2}
6651
   vpandn\t{%2, %1, %0|%0, %1, %2}"
6652
  [(set_attr "isa" "noavx,avx")
6653
   (set_attr "type" "sselog")
6654
   (set_attr "prefix_data16" "1,*")
6655
   (set_attr "prefix" "orig,vex")
6656
   (set_attr "mode" "TI")])
6657
 
6658
(define_expand "tf3"
6659
  [(set (match_operand:TF 0 "register_operand" "")
6660
        (any_logic:TF
6661
          (match_operand:TF 1 "nonimmediate_operand" "")
6662
          (match_operand:TF 2 "nonimmediate_operand" "")))]
6663
  "TARGET_SSE2"
6664
  "ix86_fixup_binary_operands_no_copy (, TFmode, operands);")
6665
 
6666
(define_insn "*tf3"
6667
  [(set (match_operand:TF 0 "register_operand" "=x,x")
6668
        (any_logic:TF
6669
          (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6670
          (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6671
  "TARGET_SSE2
6672
   && ix86_binary_operator_ok (, TFmode, operands)"
6673
  "@
6674
   p\t{%2, %0|%0, %2}
6675
   vp\t{%2, %1, %0|%0, %1, %2}"
6676
  [(set_attr "isa" "noavx,avx")
6677
   (set_attr "type" "sselog")
6678
   (set_attr "prefix_data16" "1,*")
6679
   (set_attr "prefix" "orig,vex")
6680
   (set_attr "mode" "TI")])
6681
 
6682
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6683
;;
6684
;; Parallel integral element swizzling
6685
;;
6686
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6687
 
6688
(define_expand "vec_pack_trunc_"
6689
  [(match_operand: 0 "register_operand" "")
6690
   (match_operand:VI248_AVX2 1 "register_operand" "")
6691
   (match_operand:VI248_AVX2 2 "register_operand" "")]
6692
  "TARGET_SSE2"
6693
{
6694
  rtx op1 = gen_lowpart (mode, operands[1]);
6695
  rtx op2 = gen_lowpart (mode, operands[2]);
6696
  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6697
  DONE;
6698
})
6699
 
6700
(define_insn "_packsswb"
6701
  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6702
        (vec_concat:VI1_AVX2
6703
          (ss_truncate:
6704
            (match_operand: 1 "register_operand" "0,x"))
6705
          (ss_truncate:
6706
            (match_operand: 2 "nonimmediate_operand" "xm,xm"))))]
6707
  "TARGET_SSE2"
6708
  "@
6709
   packsswb\t{%2, %0|%0, %2}
6710
   vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6711
  [(set_attr "isa" "noavx,avx")
6712
   (set_attr "type" "sselog")
6713
   (set_attr "prefix_data16" "1,*")
6714
   (set_attr "prefix" "orig,vex")
6715
   (set_attr "mode" "")])
6716
 
6717
(define_insn "_packssdw"
6718
  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6719
        (vec_concat:VI2_AVX2
6720
          (ss_truncate:
6721
            (match_operand: 1 "register_operand" "0,x"))
6722
          (ss_truncate:
6723
            (match_operand: 2 "nonimmediate_operand" "xm,xm"))))]
6724
  "TARGET_SSE2"
6725
  "@
6726
   packssdw\t{%2, %0|%0, %2}
6727
   vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6728
  [(set_attr "isa" "noavx,avx")
6729
   (set_attr "type" "sselog")
6730
   (set_attr "prefix_data16" "1,*")
6731
   (set_attr "prefix" "orig,vex")
6732
   (set_attr "mode" "")])
6733
 
6734
(define_insn "_packuswb"
6735
  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6736
        (vec_concat:VI1_AVX2
6737
          (us_truncate:
6738
            (match_operand: 1 "register_operand" "0,x"))
6739
          (us_truncate:
6740
            (match_operand: 2 "nonimmediate_operand" "xm,xm"))))]
6741
  "TARGET_SSE2"
6742
  "@
6743
   packuswb\t{%2, %0|%0, %2}
6744
   vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6745
  [(set_attr "isa" "noavx,avx")
6746
   (set_attr "type" "sselog")
6747
   (set_attr "prefix_data16" "1,*")
6748
   (set_attr "prefix" "orig,vex")
6749
   (set_attr "mode" "")])
6750
 
6751
(define_insn "avx2_interleave_highv32qi"
6752
  [(set (match_operand:V32QI 0 "register_operand" "=x")
6753
        (vec_select:V32QI
6754
          (vec_concat:V64QI
6755
            (match_operand:V32QI 1 "register_operand" "x")
6756
            (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6757
          (parallel [(const_int 8)  (const_int 40)
6758
                     (const_int 9)  (const_int 41)
6759
                     (const_int 10) (const_int 42)
6760
                     (const_int 11) (const_int 43)
6761
                     (const_int 12) (const_int 44)
6762
                     (const_int 13) (const_int 45)
6763
                     (const_int 14) (const_int 46)
6764
                     (const_int 15) (const_int 47)
6765
                     (const_int 24) (const_int 56)
6766
                     (const_int 25) (const_int 57)
6767
                     (const_int 26) (const_int 58)
6768
                     (const_int 27) (const_int 59)
6769
                     (const_int 28) (const_int 60)
6770
                     (const_int 29) (const_int 61)
6771
                     (const_int 30) (const_int 62)
6772
                     (const_int 31) (const_int 63)])))]
6773
  "TARGET_AVX2"
6774
  "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6775
  [(set_attr "type" "sselog")
6776
   (set_attr "prefix" "vex")
6777
   (set_attr "mode" "OI")])
6778
 
6779
(define_insn "vec_interleave_highv16qi"
6780
  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6781
        (vec_select:V16QI
6782
          (vec_concat:V32QI
6783
            (match_operand:V16QI 1 "register_operand" "0,x")
6784
            (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6785
          (parallel [(const_int 8)  (const_int 24)
6786
                     (const_int 9)  (const_int 25)
6787
                     (const_int 10) (const_int 26)
6788
                     (const_int 11) (const_int 27)
6789
                     (const_int 12) (const_int 28)
6790
                     (const_int 13) (const_int 29)
6791
                     (const_int 14) (const_int 30)
6792
                     (const_int 15) (const_int 31)])))]
6793
  "TARGET_SSE2"
6794
  "@
6795
   punpckhbw\t{%2, %0|%0, %2}
6796
   vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6797
  [(set_attr "isa" "noavx,avx")
6798
   (set_attr "type" "sselog")
6799
   (set_attr "prefix_data16" "1,*")
6800
   (set_attr "prefix" "orig,vex")
6801
   (set_attr "mode" "TI")])
6802
 
6803
(define_insn "avx2_interleave_lowv32qi"
6804
  [(set (match_operand:V32QI 0 "register_operand" "=x")
6805
        (vec_select:V32QI
6806
          (vec_concat:V64QI
6807
            (match_operand:V32QI 1 "register_operand" "x")
6808
            (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6809
          (parallel [(const_int 0) (const_int 32)
6810
                     (const_int 1) (const_int 33)
6811
                     (const_int 2) (const_int 34)
6812
                     (const_int 3) (const_int 35)
6813
                     (const_int 4) (const_int 36)
6814
                     (const_int 5) (const_int 37)
6815
                     (const_int 6) (const_int 38)
6816
                     (const_int 7) (const_int 39)
6817
                     (const_int 16) (const_int 48)
6818
                     (const_int 17) (const_int 49)
6819
                     (const_int 18) (const_int 50)
6820
                     (const_int 19) (const_int 51)
6821
                     (const_int 20) (const_int 52)
6822
                     (const_int 21) (const_int 53)
6823
                     (const_int 22) (const_int 54)
6824
                     (const_int 23) (const_int 55)])))]
6825
  "TARGET_AVX2"
6826
  "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6827
  [(set_attr "type" "sselog")
6828
   (set_attr "prefix" "vex")
6829
   (set_attr "mode" "OI")])
6830
 
6831
(define_insn "vec_interleave_lowv16qi"
6832
  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6833
        (vec_select:V16QI
6834
          (vec_concat:V32QI
6835
            (match_operand:V16QI 1 "register_operand" "0,x")
6836
            (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6837
          (parallel [(const_int 0) (const_int 16)
6838
                     (const_int 1) (const_int 17)
6839
                     (const_int 2) (const_int 18)
6840
                     (const_int 3) (const_int 19)
6841
                     (const_int 4) (const_int 20)
6842
                     (const_int 5) (const_int 21)
6843
                     (const_int 6) (const_int 22)
6844
                     (const_int 7) (const_int 23)])))]
6845
  "TARGET_SSE2"
6846
  "@
6847
   punpcklbw\t{%2, %0|%0, %2}
6848
   vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6849
  [(set_attr "isa" "noavx,avx")
6850
   (set_attr "type" "sselog")
6851
   (set_attr "prefix_data16" "1,*")
6852
   (set_attr "prefix" "orig,vex")
6853
   (set_attr "mode" "TI")])
6854
 
6855
(define_insn "avx2_interleave_highv16hi"
6856
  [(set (match_operand:V16HI 0 "register_operand" "=x")
6857
        (vec_select:V16HI
6858
          (vec_concat:V32HI
6859
            (match_operand:V16HI 1 "register_operand" "x")
6860
            (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6861
          (parallel [(const_int 4) (const_int 20)
6862
                     (const_int 5) (const_int 21)
6863
                     (const_int 6) (const_int 22)
6864
                     (const_int 7) (const_int 23)
6865
                     (const_int 12) (const_int 28)
6866
                     (const_int 13) (const_int 29)
6867
                     (const_int 14) (const_int 30)
6868
                     (const_int 15) (const_int 31)])))]
6869
  "TARGET_AVX2"
6870
  "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6871
  [(set_attr "type" "sselog")
6872
   (set_attr "prefix" "vex")
6873
   (set_attr "mode" "OI")])
6874
 
6875
(define_insn "vec_interleave_highv8hi"
6876
  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6877
        (vec_select:V8HI
6878
          (vec_concat:V16HI
6879
            (match_operand:V8HI 1 "register_operand" "0,x")
6880
            (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6881
          (parallel [(const_int 4) (const_int 12)
6882
                     (const_int 5) (const_int 13)
6883
                     (const_int 6) (const_int 14)
6884
                     (const_int 7) (const_int 15)])))]
6885
  "TARGET_SSE2"
6886
  "@
6887
   punpckhwd\t{%2, %0|%0, %2}
6888
   vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6889
  [(set_attr "isa" "noavx,avx")
6890
   (set_attr "type" "sselog")
6891
   (set_attr "prefix_data16" "1,*")
6892
   (set_attr "prefix" "orig,vex")
6893
   (set_attr "mode" "TI")])
6894
 
6895
(define_insn "avx2_interleave_lowv16hi"
6896
  [(set (match_operand:V16HI 0 "register_operand" "=x")
6897
        (vec_select:V16HI
6898
          (vec_concat:V32HI
6899
            (match_operand:V16HI 1 "register_operand" "x")
6900
            (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6901
          (parallel [(const_int 0) (const_int 16)
6902
                     (const_int 1) (const_int 17)
6903
                     (const_int 2) (const_int 18)
6904
                     (const_int 3) (const_int 19)
6905
                     (const_int 8) (const_int 24)
6906
                     (const_int 9) (const_int 25)
6907
                     (const_int 10) (const_int 26)
6908
                     (const_int 11) (const_int 27)])))]
6909
  "TARGET_AVX2"
6910
  "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6911
  [(set_attr "type" "sselog")
6912
   (set_attr "prefix" "vex")
6913
   (set_attr "mode" "OI")])
6914
 
6915
(define_insn "vec_interleave_lowv8hi"
6916
  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6917
        (vec_select:V8HI
6918
          (vec_concat:V16HI
6919
            (match_operand:V8HI 1 "register_operand" "0,x")
6920
            (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6921
          (parallel [(const_int 0) (const_int 8)
6922
                     (const_int 1) (const_int 9)
6923
                     (const_int 2) (const_int 10)
6924
                     (const_int 3) (const_int 11)])))]
6925
  "TARGET_SSE2"
6926
  "@
6927
   punpcklwd\t{%2, %0|%0, %2}
6928
   vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6929
  [(set_attr "isa" "noavx,avx")
6930
   (set_attr "type" "sselog")
6931
   (set_attr "prefix_data16" "1,*")
6932
   (set_attr "prefix" "orig,vex")
6933
   (set_attr "mode" "TI")])
6934
 
6935
(define_insn "avx2_interleave_highv8si"
6936
  [(set (match_operand:V8SI 0 "register_operand" "=x")
6937
        (vec_select:V8SI
6938
          (vec_concat:V16SI
6939
            (match_operand:V8SI 1 "register_operand" "x")
6940
            (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6941
          (parallel [(const_int 2) (const_int 10)
6942
                     (const_int 3) (const_int 11)
6943
                     (const_int 6) (const_int 14)
6944
                     (const_int 7) (const_int 15)])))]
6945
  "TARGET_AVX2"
6946
  "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6947
  [(set_attr "type" "sselog")
6948
   (set_attr "prefix" "vex")
6949
   (set_attr "mode" "OI")])
6950
 
6951
(define_insn "vec_interleave_highv4si"
6952
  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6953
        (vec_select:V4SI
6954
          (vec_concat:V8SI
6955
            (match_operand:V4SI 1 "register_operand" "0,x")
6956
            (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6957
          (parallel [(const_int 2) (const_int 6)
6958
                     (const_int 3) (const_int 7)])))]
6959
  "TARGET_SSE2"
6960
  "@
6961
   punpckhdq\t{%2, %0|%0, %2}
6962
   vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6963
  [(set_attr "isa" "noavx,avx")
6964
   (set_attr "type" "sselog")
6965
   (set_attr "prefix_data16" "1,*")
6966
   (set_attr "prefix" "orig,vex")
6967
   (set_attr "mode" "TI")])
6968
 
6969
(define_insn "avx2_interleave_lowv8si"
6970
  [(set (match_operand:V8SI 0 "register_operand" "=x")
6971
        (vec_select:V8SI
6972
          (vec_concat:V16SI
6973
            (match_operand:V8SI 1 "register_operand" "x")
6974
            (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6975
          (parallel [(const_int 0) (const_int 8)
6976
                     (const_int 1) (const_int 9)
6977
                     (const_int 4) (const_int 12)
6978
                     (const_int 5) (const_int 13)])))]
6979
  "TARGET_AVX2"
6980
  "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6981
  [(set_attr "type" "sselog")
6982
   (set_attr "prefix" "vex")
6983
   (set_attr "mode" "OI")])
6984
 
6985
(define_insn "vec_interleave_lowv4si"
6986
  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6987
        (vec_select:V4SI
6988
          (vec_concat:V8SI
6989
            (match_operand:V4SI 1 "register_operand" "0,x")
6990
            (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6991
          (parallel [(const_int 0) (const_int 4)
6992
                     (const_int 1) (const_int 5)])))]
6993
  "TARGET_SSE2"
6994
  "@
6995
   punpckldq\t{%2, %0|%0, %2}
6996
   vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6997
  [(set_attr "isa" "noavx,avx")
6998
   (set_attr "type" "sselog")
6999
   (set_attr "prefix_data16" "1,*")
7000
   (set_attr "prefix" "orig,vex")
7001
   (set_attr "mode" "TI")])
7002
 
7003
(define_expand "vec_interleave_high"
7004
  [(match_operand:VI_256 0 "register_operand" "=x")
7005
   (match_operand:VI_256 1 "register_operand" "x")
7006
   (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
7007
 "TARGET_AVX2"
7008
{
7009
  rtx t1 = gen_reg_rtx (mode);
7010
  rtx t2 = gen_reg_rtx (mode);
7011
  emit_insn (gen_avx2_interleave_low (t1, operands[1], operands[2]));
7012
  emit_insn (gen_avx2_interleave_high (t2,  operands[1], operands[2]));
7013
  emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
7014
                                gen_lowpart (V4DImode, t1),
7015
                                gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
7016
  DONE;
7017
})
7018
 
7019
(define_expand "vec_interleave_low"
7020
  [(match_operand:VI_256 0 "register_operand" "=x")
7021
   (match_operand:VI_256 1 "register_operand" "x")
7022
   (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
7023
 "TARGET_AVX2"
7024
{
7025
  rtx t1 = gen_reg_rtx (mode);
7026
  rtx t2 = gen_reg_rtx (mode);
7027
  emit_insn (gen_avx2_interleave_low (t1, operands[1], operands[2]));
7028
  emit_insn (gen_avx2_interleave_high (t2, operands[1], operands[2]));
7029
  emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
7030
                                gen_lowpart (V4DImode, t1),
7031
                                gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
7032
  DONE;
7033
})
7034
 
7035
;; Modes handled by pinsr patterns.
7036
(define_mode_iterator PINSR_MODE
7037
  [(V16QI "TARGET_SSE4_1") V8HI
7038
   (V4SI "TARGET_SSE4_1")
7039
   (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
7040
 
7041
(define_mode_attr sse2p4_1
7042
  [(V16QI "sse4_1") (V8HI "sse2")
7043
   (V4SI "sse4_1") (V2DI "sse4_1")])
7044
 
7045
;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
7046
(define_insn "_pinsr"
7047
  [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
7048
        (vec_merge:PINSR_MODE
7049
          (vec_duplicate:PINSR_MODE
7050
            (match_operand: 2 "nonimmediate_operand" "r,m,r,m"))
7051
          (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
7052
          (match_operand:SI 3 "const_int_operand" "")))]
7053
  "TARGET_SSE2
7054
   && ((unsigned) exact_log2 (INTVAL (operands[3]))
7055
       < GET_MODE_NUNITS (mode))"
7056
{
7057
  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7058
 
7059
  switch (which_alternative)
7060
    {
7061
    case 0:
7062
      if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode))
7063
        return "pinsr\t{%3, %k2, %0|%0, %k2, %3}";
7064
      /* FALLTHRU */
7065
    case 1:
7066
      return "pinsr\t{%3, %2, %0|%0, %2, %3}";
7067
    case 2:
7068
      if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode))
7069
        return "vpinsr\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
7070
      /* FALLTHRU */
7071
    case 3:
7072
      return "vpinsr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7073
    default:
7074
      gcc_unreachable ();
7075
    }
7076
}
7077
  [(set_attr "isa" "noavx,noavx,avx,avx")
7078
   (set_attr "type" "sselog")
7079
   (set (attr "prefix_rex")
7080
     (if_then_else
7081
       (and (not (match_test "TARGET_AVX"))
7082
            (eq (const_string "mode") (const_string "V2DImode")))
7083
       (const_string "1")
7084
       (const_string "*")))
7085
   (set (attr "prefix_data16")
7086
     (if_then_else
7087
       (and (not (match_test "TARGET_AVX"))
7088
            (eq (const_string "mode") (const_string "V8HImode")))
7089
       (const_string "1")
7090
       (const_string "*")))
7091
   (set (attr "prefix_extra")
7092
     (if_then_else
7093
       (and (not (match_test "TARGET_AVX"))
7094
            (eq (const_string "mode") (const_string "V8HImode")))
7095
       (const_string "*")
7096
       (const_string "1")))
7097
   (set_attr "length_immediate" "1")
7098
   (set_attr "prefix" "orig,orig,vex,vex")
7099
   (set_attr "mode" "TI")])
7100
 
7101
(define_insn "*sse4_1_pextrb_"
7102
  [(set (match_operand:SWI48 0 "register_operand" "=r")
7103
        (zero_extend:SWI48
7104
          (vec_select:QI
7105
            (match_operand:V16QI 1 "register_operand" "x")
7106
            (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7107
  "TARGET_SSE4_1"
7108
  "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
7109
  [(set_attr "type" "sselog")
7110
   (set_attr "prefix_extra" "1")
7111
   (set_attr "length_immediate" "1")
7112
   (set_attr "prefix" "maybe_vex")
7113
   (set_attr "mode" "TI")])
7114
 
7115
(define_insn "*sse4_1_pextrb_memory"
7116
  [(set (match_operand:QI 0 "memory_operand" "=m")
7117
        (vec_select:QI
7118
          (match_operand:V16QI 1 "register_operand" "x")
7119
          (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7120
  "TARGET_SSE4_1"
7121
  "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7122
  [(set_attr "type" "sselog")
7123
   (set_attr "prefix_extra" "1")
7124
   (set_attr "length_immediate" "1")
7125
   (set_attr "prefix" "maybe_vex")
7126
   (set_attr "mode" "TI")])
7127
 
7128
(define_insn "*sse2_pextrw_"
7129
  [(set (match_operand:SWI48 0 "register_operand" "=r")
7130
        (zero_extend:SWI48
7131
          (vec_select:HI
7132
            (match_operand:V8HI 1 "register_operand" "x")
7133
            (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7134
  "TARGET_SSE2"
7135
  "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7136
  [(set_attr "type" "sselog")
7137
   (set_attr "prefix_data16" "1")
7138
   (set_attr "length_immediate" "1")
7139
   (set_attr "prefix" "maybe_vex")
7140
   (set_attr "mode" "TI")])
7141
 
7142
(define_insn "*sse4_1_pextrw_memory"
7143
  [(set (match_operand:HI 0 "memory_operand" "=m")
7144
        (vec_select:HI
7145
          (match_operand:V8HI 1 "register_operand" "x")
7146
          (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7147
  "TARGET_SSE4_1"
7148
  "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7149
  [(set_attr "type" "sselog")
7150
   (set_attr "prefix_extra" "1")
7151
   (set_attr "length_immediate" "1")
7152
   (set_attr "prefix" "maybe_vex")
7153
   (set_attr "mode" "TI")])
7154
 
7155
(define_insn "*sse4_1_pextrd"
7156
  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7157
        (vec_select:SI
7158
          (match_operand:V4SI 1 "register_operand" "x")
7159
          (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7160
  "TARGET_SSE4_1"
7161
  "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7162
  [(set_attr "type" "sselog")
7163
   (set_attr "prefix_extra" "1")
7164
   (set_attr "length_immediate" "1")
7165
   (set_attr "prefix" "maybe_vex")
7166
   (set_attr "mode" "TI")])
7167
 
7168
(define_insn "*sse4_1_pextrd_zext"
7169
  [(set (match_operand:DI 0 "register_operand" "=r")
7170
        (zero_extend:DI
7171
          (vec_select:SI
7172
            (match_operand:V4SI 1 "register_operand" "x")
7173
            (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7174
  "TARGET_64BIT && TARGET_SSE4_1"
7175
  "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7176
  [(set_attr "type" "sselog")
7177
   (set_attr "prefix_extra" "1")
7178
   (set_attr "length_immediate" "1")
7179
   (set_attr "prefix" "maybe_vex")
7180
   (set_attr "mode" "TI")])
7181
 
7182
;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
7183
(define_insn "*sse4_1_pextrq"
7184
  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7185
        (vec_select:DI
7186
          (match_operand:V2DI 1 "register_operand" "x")
7187
          (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7188
  "TARGET_SSE4_1 && TARGET_64BIT"
7189
  "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7190
  [(set_attr "type" "sselog")
7191
   (set_attr "prefix_rex" "1")
7192
   (set_attr "prefix_extra" "1")
7193
   (set_attr "length_immediate" "1")
7194
   (set_attr "prefix" "maybe_vex")
7195
   (set_attr "mode" "TI")])
7196
 
7197
(define_expand "avx2_pshufdv3"
7198
  [(match_operand:V8SI 0 "register_operand" "")
7199
   (match_operand:V8SI 1 "nonimmediate_operand" "")
7200
   (match_operand:SI 2 "const_0_to_255_operand" "")]
7201
  "TARGET_AVX2"
7202
{
7203
  int mask = INTVAL (operands[2]);
7204
  emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7205
                                GEN_INT ((mask >> 0) & 3),
7206
                                GEN_INT ((mask >> 2) & 3),
7207
                                GEN_INT ((mask >> 4) & 3),
7208
                                GEN_INT ((mask >> 6) & 3),
7209
                                GEN_INT (((mask >> 0) & 3) + 4),
7210
                                GEN_INT (((mask >> 2) & 3) + 4),
7211
                                GEN_INT (((mask >> 4) & 3) + 4),
7212
                                GEN_INT (((mask >> 6) & 3) + 4)));
7213
  DONE;
7214
})
7215
 
7216
(define_insn "avx2_pshufd_1"
7217
  [(set (match_operand:V8SI 0 "register_operand" "=x")
7218
        (vec_select:V8SI
7219
          (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7220
          (parallel [(match_operand 2 "const_0_to_3_operand" "")
7221
                     (match_operand 3 "const_0_to_3_operand" "")
7222
                     (match_operand 4 "const_0_to_3_operand" "")
7223
                     (match_operand 5 "const_0_to_3_operand" "")
7224
                     (match_operand 6 "const_4_to_7_operand" "")
7225
                     (match_operand 7 "const_4_to_7_operand" "")
7226
                     (match_operand 8 "const_4_to_7_operand" "")
7227
                     (match_operand 9 "const_4_to_7_operand" "")])))]
7228
  "TARGET_AVX2
7229
   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7230
   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7231
   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7232
   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7233
{
7234
  int mask = 0;
7235
  mask |= INTVAL (operands[2]) << 0;
7236
  mask |= INTVAL (operands[3]) << 2;
7237
  mask |= INTVAL (operands[4]) << 4;
7238
  mask |= INTVAL (operands[5]) << 6;
7239
  operands[2] = GEN_INT (mask);
7240
 
7241
  return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7242
}
7243
  [(set_attr "type" "sselog1")
7244
   (set_attr "prefix" "vex")
7245
   (set_attr "length_immediate" "1")
7246
   (set_attr "mode" "OI")])
7247
 
7248
(define_expand "sse2_pshufd"
7249
  [(match_operand:V4SI 0 "register_operand" "")
7250
   (match_operand:V4SI 1 "nonimmediate_operand" "")
7251
   (match_operand:SI 2 "const_int_operand" "")]
7252
  "TARGET_SSE2"
7253
{
7254
  int mask = INTVAL (operands[2]);
7255
  emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7256
                                GEN_INT ((mask >> 0) & 3),
7257
                                GEN_INT ((mask >> 2) & 3),
7258
                                GEN_INT ((mask >> 4) & 3),
7259
                                GEN_INT ((mask >> 6) & 3)));
7260
  DONE;
7261
})
7262
 
7263
(define_insn "sse2_pshufd_1"
7264
  [(set (match_operand:V4SI 0 "register_operand" "=x")
7265
        (vec_select:V4SI
7266
          (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7267
          (parallel [(match_operand 2 "const_0_to_3_operand" "")
7268
                     (match_operand 3 "const_0_to_3_operand" "")
7269
                     (match_operand 4 "const_0_to_3_operand" "")
7270
                     (match_operand 5 "const_0_to_3_operand" "")])))]
7271
  "TARGET_SSE2"
7272
{
7273
  int mask = 0;
7274
  mask |= INTVAL (operands[2]) << 0;
7275
  mask |= INTVAL (operands[3]) << 2;
7276
  mask |= INTVAL (operands[4]) << 4;
7277
  mask |= INTVAL (operands[5]) << 6;
7278
  operands[2] = GEN_INT (mask);
7279
 
7280
  return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7281
}
7282
  [(set_attr "type" "sselog1")
7283
   (set_attr "prefix_data16" "1")
7284
   (set_attr "prefix" "maybe_vex")
7285
   (set_attr "length_immediate" "1")
7286
   (set_attr "mode" "TI")])
7287
 
7288
(define_expand "avx2_pshuflwv3"
7289
  [(match_operand:V16HI 0 "register_operand" "")
7290
   (match_operand:V16HI 1 "nonimmediate_operand" "")
7291
   (match_operand:SI 2 "const_0_to_255_operand" "")]
7292
  "TARGET_AVX2"
7293
{
7294
  int mask = INTVAL (operands[2]);
7295
  emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7296
                                 GEN_INT ((mask >> 0) & 3),
7297
                                 GEN_INT ((mask >> 2) & 3),
7298
                                 GEN_INT ((mask >> 4) & 3),
7299
                                 GEN_INT ((mask >> 6) & 3),
7300
                                 GEN_INT (((mask >> 0) & 3) + 8),
7301
                                 GEN_INT (((mask >> 2) & 3) + 8),
7302
                                 GEN_INT (((mask >> 4) & 3) + 8),
7303
                                 GEN_INT (((mask >> 6) & 3) + 8)));
7304
  DONE;
7305
})
7306
 
7307
(define_insn "avx2_pshuflw_1"
7308
  [(set (match_operand:V16HI 0 "register_operand" "=x")
7309
        (vec_select:V16HI
7310
          (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7311
          (parallel [(match_operand 2 "const_0_to_3_operand" "")
7312
                     (match_operand 3 "const_0_to_3_operand" "")
7313
                     (match_operand 4 "const_0_to_3_operand" "")
7314
                     (match_operand 5 "const_0_to_3_operand" "")
7315
                     (const_int 4)
7316
                     (const_int 5)
7317
                     (const_int 6)
7318
                     (const_int 7)
7319
                     (match_operand 6 "const_8_to_11_operand" "")
7320
                     (match_operand 7 "const_8_to_11_operand" "")
7321
                     (match_operand 8 "const_8_to_11_operand" "")
7322
                     (match_operand 9 "const_8_to_11_operand" "")
7323
                     (const_int 12)
7324
                     (const_int 13)
7325
                     (const_int 14)
7326
                     (const_int 15)])))]
7327
  "TARGET_AVX2
7328
   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7329
   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7330
   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7331
   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7332
{
7333
  int mask = 0;
7334
  mask |= INTVAL (operands[2]) << 0;
7335
  mask |= INTVAL (operands[3]) << 2;
7336
  mask |= INTVAL (operands[4]) << 4;
7337
  mask |= INTVAL (operands[5]) << 6;
7338
  operands[2] = GEN_INT (mask);
7339
 
7340
  return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7341
}
7342
  [(set_attr "type" "sselog")
7343
   (set_attr "prefix" "vex")
7344
   (set_attr "length_immediate" "1")
7345
   (set_attr "mode" "OI")])
7346
 
7347
(define_expand "sse2_pshuflw"
7348
  [(match_operand:V8HI 0 "register_operand" "")
7349
   (match_operand:V8HI 1 "nonimmediate_operand" "")
7350
   (match_operand:SI 2 "const_int_operand" "")]
7351
  "TARGET_SSE2"
7352
{
7353
  int mask = INTVAL (operands[2]);
7354
  emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7355
                                 GEN_INT ((mask >> 0) & 3),
7356
                                 GEN_INT ((mask >> 2) & 3),
7357
                                 GEN_INT ((mask >> 4) & 3),
7358
                                 GEN_INT ((mask >> 6) & 3)));
7359
  DONE;
7360
})
7361
 
7362
(define_insn "sse2_pshuflw_1"
7363
  [(set (match_operand:V8HI 0 "register_operand" "=x")
7364
        (vec_select:V8HI
7365
          (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7366
          (parallel [(match_operand 2 "const_0_to_3_operand" "")
7367
                     (match_operand 3 "const_0_to_3_operand" "")
7368
                     (match_operand 4 "const_0_to_3_operand" "")
7369
                     (match_operand 5 "const_0_to_3_operand" "")
7370
                     (const_int 4)
7371
                     (const_int 5)
7372
                     (const_int 6)
7373
                     (const_int 7)])))]
7374
  "TARGET_SSE2"
7375
{
7376
  int mask = 0;
7377
  mask |= INTVAL (operands[2]) << 0;
7378
  mask |= INTVAL (operands[3]) << 2;
7379
  mask |= INTVAL (operands[4]) << 4;
7380
  mask |= INTVAL (operands[5]) << 6;
7381
  operands[2] = GEN_INT (mask);
7382
 
7383
  return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7384
}
7385
  [(set_attr "type" "sselog")
7386
   (set_attr "prefix_data16" "0")
7387
   (set_attr "prefix_rep" "1")
7388
   (set_attr "prefix" "maybe_vex")
7389
   (set_attr "length_immediate" "1")
7390
   (set_attr "mode" "TI")])
7391
 
7392
(define_expand "avx2_pshufhwv3"
7393
  [(match_operand:V16HI 0 "register_operand" "")
7394
   (match_operand:V16HI 1 "nonimmediate_operand" "")
7395
   (match_operand:SI 2 "const_0_to_255_operand" "")]
7396
  "TARGET_AVX2"
7397
{
7398
  int mask = INTVAL (operands[2]);
7399
  emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7400
                                 GEN_INT (((mask >> 0) & 3) + 4),
7401
                                 GEN_INT (((mask >> 2) & 3) + 4),
7402
                                 GEN_INT (((mask >> 4) & 3) + 4),
7403
                                 GEN_INT (((mask >> 6) & 3) + 4),
7404
                                 GEN_INT (((mask >> 0) & 3) + 12),
7405
                                 GEN_INT (((mask >> 2) & 3) + 12),
7406
                                 GEN_INT (((mask >> 4) & 3) + 12),
7407
                                 GEN_INT (((mask >> 6) & 3) + 12)));
7408
  DONE;
7409
})
7410
 
7411
(define_insn "avx2_pshufhw_1"
7412
  [(set (match_operand:V16HI 0 "register_operand" "=x")
7413
        (vec_select:V16HI
7414
          (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7415
          (parallel [(const_int 0)
7416
                     (const_int 1)
7417
                     (const_int 2)
7418
                     (const_int 3)
7419
                     (match_operand 2 "const_4_to_7_operand" "")
7420
                     (match_operand 3 "const_4_to_7_operand" "")
7421
                     (match_operand 4 "const_4_to_7_operand" "")
7422
                     (match_operand 5 "const_4_to_7_operand" "")
7423
                     (const_int 8)
7424
                     (const_int 9)
7425
                     (const_int 10)
7426
                     (const_int 11)
7427
                     (match_operand 6 "const_12_to_15_operand" "")
7428
                     (match_operand 7 "const_12_to_15_operand" "")
7429
                     (match_operand 8 "const_12_to_15_operand" "")
7430
                     (match_operand 9 "const_12_to_15_operand" "")])))]
7431
  "TARGET_AVX2
7432
   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7433
   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7434
   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7435
   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7436
{
7437
  int mask = 0;
7438
  mask |= (INTVAL (operands[2]) - 4) << 0;
7439
  mask |= (INTVAL (operands[3]) - 4) << 2;
7440
  mask |= (INTVAL (operands[4]) - 4) << 4;
7441
  mask |= (INTVAL (operands[5]) - 4) << 6;
7442
  operands[2] = GEN_INT (mask);
7443
 
7444
  return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7445
}
7446
  [(set_attr "type" "sselog")
7447
   (set_attr "prefix" "vex")
7448
   (set_attr "length_immediate" "1")
7449
   (set_attr "mode" "OI")])
7450
 
7451
(define_expand "sse2_pshufhw"
7452
  [(match_operand:V8HI 0 "register_operand" "")
7453
   (match_operand:V8HI 1 "nonimmediate_operand" "")
7454
   (match_operand:SI 2 "const_int_operand" "")]
7455
  "TARGET_SSE2"
7456
{
7457
  int mask = INTVAL (operands[2]);
7458
  emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7459
                                 GEN_INT (((mask >> 0) & 3) + 4),
7460
                                 GEN_INT (((mask >> 2) & 3) + 4),
7461
                                 GEN_INT (((mask >> 4) & 3) + 4),
7462
                                 GEN_INT (((mask >> 6) & 3) + 4)));
7463
  DONE;
7464
})
7465
 
7466
(define_insn "sse2_pshufhw_1"
7467
  [(set (match_operand:V8HI 0 "register_operand" "=x")
7468
        (vec_select:V8HI
7469
          (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7470
          (parallel [(const_int 0)
7471
                     (const_int 1)
7472
                     (const_int 2)
7473
                     (const_int 3)
7474
                     (match_operand 2 "const_4_to_7_operand" "")
7475
                     (match_operand 3 "const_4_to_7_operand" "")
7476
                     (match_operand 4 "const_4_to_7_operand" "")
7477
                     (match_operand 5 "const_4_to_7_operand" "")])))]
7478
  "TARGET_SSE2"
7479
{
7480
  int mask = 0;
7481
  mask |= (INTVAL (operands[2]) - 4) << 0;
7482
  mask |= (INTVAL (operands[3]) - 4) << 2;
7483
  mask |= (INTVAL (operands[4]) - 4) << 4;
7484
  mask |= (INTVAL (operands[5]) - 4) << 6;
7485
  operands[2] = GEN_INT (mask);
7486
 
7487
  return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7488
}
7489
  [(set_attr "type" "sselog")
7490
   (set_attr "prefix_rep" "1")
7491
   (set_attr "prefix_data16" "0")
7492
   (set_attr "prefix" "maybe_vex")
7493
   (set_attr "length_immediate" "1")
7494
   (set_attr "mode" "TI")])
7495
 
7496
(define_expand "sse2_loadd"
7497
  [(set (match_operand:V4SI 0 "register_operand" "")
7498
        (vec_merge:V4SI
7499
          (vec_duplicate:V4SI
7500
            (match_operand:SI 1 "nonimmediate_operand" ""))
7501
          (match_dup 2)
7502
          (const_int 1)))]
7503
  "TARGET_SSE"
7504
  "operands[2] = CONST0_RTX (V4SImode);")
7505
 
7506
(define_insn "sse2_loadld"
7507
  [(set (match_operand:V4SI 0 "register_operand"       "=x,Yi,x,x,x")
7508
        (vec_merge:V4SI
7509
          (vec_duplicate:V4SI
7510
            (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7511
          (match_operand:V4SI 1 "reg_or_0_operand"     "C ,C ,C,0,x")
7512
          (const_int 1)))]
7513
  "TARGET_SSE"
7514
  "@
7515
   %vmovd\t{%2, %0|%0, %2}
7516
   %vmovd\t{%2, %0|%0, %2}
7517
   movss\t{%2, %0|%0, %2}
7518
   movss\t{%2, %0|%0, %2}
7519
   vmovss\t{%2, %1, %0|%0, %1, %2}"
7520
  [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7521
   (set_attr "type" "ssemov")
7522
   (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7523
   (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7524
 
7525
(define_insn_and_split "sse2_stored"
7526
  [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7527
        (vec_select:SI
7528
          (match_operand:V4SI 1 "register_operand" "x,Yi")
7529
          (parallel [(const_int 0)])))]
7530
  "TARGET_SSE"
7531
  "#"
7532
  "&& reload_completed
7533
   && (TARGET_INTER_UNIT_MOVES
7534
       || MEM_P (operands [0])
7535
       || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7536
  [(set (match_dup 0) (match_dup 1))]
7537
  "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7538
 
7539
(define_insn_and_split "*vec_ext_v4si_mem"
7540
  [(set (match_operand:SI 0 "register_operand" "=r")
7541
        (vec_select:SI
7542
          (match_operand:V4SI 1 "memory_operand" "o")
7543
          (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7544
  ""
7545
  "#"
7546
  "reload_completed"
7547
  [(const_int 0)]
7548
{
7549
  int i = INTVAL (operands[2]);
7550
 
7551
  emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7552
  DONE;
7553
})
7554
 
7555
(define_expand "sse_storeq"
7556
  [(set (match_operand:DI 0 "nonimmediate_operand" "")
7557
        (vec_select:DI
7558
          (match_operand:V2DI 1 "register_operand" "")
7559
          (parallel [(const_int 0)])))]
7560
  "TARGET_SSE")
7561
 
7562
(define_insn "*sse2_storeq_rex64"
7563
  [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7564
        (vec_select:DI
7565
          (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7566
          (parallel [(const_int 0)])))]
7567
  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7568
  "@
7569
   #
7570
   #
7571
   mov{q}\t{%1, %0|%0, %1}"
7572
  [(set_attr "type" "*,*,imov")
7573
   (set_attr "mode" "*,*,DI")])
7574
 
7575
(define_insn "*sse2_storeq"
7576
  [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7577
        (vec_select:DI
7578
          (match_operand:V2DI 1 "register_operand" "x")
7579
          (parallel [(const_int 0)])))]
7580
  "TARGET_SSE"
7581
  "#")
7582
 
7583
(define_split
7584
  [(set (match_operand:DI 0 "nonimmediate_operand" "")
7585
        (vec_select:DI
7586
          (match_operand:V2DI 1 "register_operand" "")
7587
          (parallel [(const_int 0)])))]
7588
  "TARGET_SSE
7589
   && reload_completed
7590
   && (TARGET_INTER_UNIT_MOVES
7591
       || MEM_P (operands [0])
7592
       || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7593
  [(set (match_dup 0) (match_dup 1))]
7594
  "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7595
 
7596
(define_insn "*vec_extractv2di_1_rex64"
7597
  [(set (match_operand:DI 0 "nonimmediate_operand"     "=m,x,x,x,r")
7598
        (vec_select:DI
7599
          (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7600
          (parallel [(const_int 1)])))]
7601
  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7602
  "@
7603
   %vmovhps\t{%1, %0|%0, %1}
7604
   psrldq\t{$8, %0|%0, 8}
7605
   vpsrldq\t{$8, %1, %0|%0, %1, 8}
7606
   %vmovq\t{%H1, %0|%0, %H1}
7607
   mov{q}\t{%H1, %0|%0, %H1}"
7608
  [(set_attr "isa" "*,noavx,avx,*,*")
7609
   (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7610
   (set_attr "length_immediate" "*,1,1,*,*")
7611
   (set_attr "memory" "*,none,none,*,*")
7612
   (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7613
   (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7614
 
7615
(define_insn "*vec_extractv2di_1"
7616
  [(set (match_operand:DI 0 "nonimmediate_operand"     "=m,x,x,x,x,x")
7617
        (vec_select:DI
7618
          (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7619
          (parallel [(const_int 1)])))]
7620
  "!TARGET_64BIT && TARGET_SSE
7621
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7622
  "@
7623
   %vmovhps\t{%1, %0|%0, %1}
7624
   psrldq\t{$8, %0|%0, 8}
7625
   vpsrldq\t{$8, %1, %0|%0, %1, 8}
7626
   %vmovq\t{%H1, %0|%0, %H1}
7627
   movhlps\t{%1, %0|%0, %1}
7628
   movlps\t{%H1, %0|%0, %H1}"
7629
  [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7630
   (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7631
   (set_attr "length_immediate" "*,1,1,*,*,*")
7632
   (set_attr "memory" "*,none,none,*,*,*")
7633
   (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7634
   (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7635
 
7636
(define_insn "*vec_dupv4si"
7637
  [(set (match_operand:V4SI 0 "register_operand"     "=x,x,x")
7638
        (vec_duplicate:V4SI
7639
          (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
7640
  "TARGET_SSE"
7641
  "@
7642
   %vpshufd\t{$0, %1, %0|%0, %1, 0}
7643
   vbroadcastss\t{%1, %0|%0, %1}
7644
   shufps\t{$0, %0, %0|%0, %0, 0}"
7645
  [(set_attr "isa" "sse2,avx,noavx")
7646
   (set_attr "type" "sselog1,ssemov,sselog1")
7647
   (set_attr "length_immediate" "1,0,1")
7648
   (set_attr "prefix_extra" "0,1,*")
7649
   (set_attr "prefix" "maybe_vex,vex,orig")
7650
   (set_attr "mode" "TI,V4SF,V4SF")])
7651
 
7652
(define_insn "*vec_dupv2di"
7653
  [(set (match_operand:V2DI 0 "register_operand"     "=x,x,x,x")
7654
        (vec_duplicate:V2DI
7655
          (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
7656
  "TARGET_SSE"
7657
  "@
7658
   punpcklqdq\t%0, %0
7659
   vpunpcklqdq\t{%d1, %0|%0, %d1}
7660
   %vmovddup\t{%1, %0|%0, %1}
7661
   movlhps\t%0, %0"
7662
  [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
7663
   (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
7664
   (set_attr "prefix" "orig,vex,maybe_vex,orig")
7665
   (set_attr "mode" "TI,TI,DF,V4SF")])
7666
 
7667
(define_insn "*vec_concatv2si_sse4_1"
7668
  [(set (match_operand:V2SI 0 "register_operand"     "=x, x,x,x, x, *y,*y")
7669
        (vec_concat:V2SI
7670
          (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm,  0,rm")
7671
          (match_operand:SI 2 "vector_move_operand"  "rm,rm,x,x, C,*ym, C")))]
7672
  "TARGET_SSE4_1"
7673
  "@
7674
   pinsrd\t{$1, %2, %0|%0, %2, 1}
7675
   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7676
   punpckldq\t{%2, %0|%0, %2}
7677
   vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7678
   %vmovd\t{%1, %0|%0, %1}
7679
   punpckldq\t{%2, %0|%0, %2}
7680
   movd\t{%1, %0|%0, %1}"
7681
  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7682
   (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7683
   (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7684
   (set_attr "length_immediate" "1,1,*,*,*,*,*")
7685
   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7686
   (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7687
 
7688
;; ??? In theory we can match memory for the MMX alternative, but allowing
7689
;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7690
;; alternatives pretty much forces the MMX alternative to be chosen.
7691
(define_insn "*vec_concatv2si_sse2"
7692
  [(set (match_operand:V2SI 0 "register_operand"     "=x,x ,*y,*y")
7693
        (vec_concat:V2SI
7694
          (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7695
          (match_operand:SI 2 "reg_or_0_operand"     " x,C ,*y, C")))]
7696
  "TARGET_SSE2"
7697
  "@
7698
   punpckldq\t{%2, %0|%0, %2}
7699
   movd\t{%1, %0|%0, %1}
7700
   punpckldq\t{%2, %0|%0, %2}
7701
   movd\t{%1, %0|%0, %1}"
7702
  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7703
   (set_attr "mode" "TI,TI,DI,DI")])
7704
 
7705
(define_insn "*vec_concatv2si_sse"
7706
  [(set (match_operand:V2SI 0 "register_operand"     "=x,x,*y,*y")
7707
        (vec_concat:V2SI
7708
          (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7709
          (match_operand:SI 2 "reg_or_0_operand"     " x,C,*y,C")))]
7710
  "TARGET_SSE"
7711
  "@
7712
   unpcklps\t{%2, %0|%0, %2}
7713
   movss\t{%1, %0|%0, %1}
7714
   punpckldq\t{%2, %0|%0, %2}
7715
   movd\t{%1, %0|%0, %1}"
7716
  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7717
   (set_attr "mode" "V4SF,V4SF,DI,DI")])
7718
 
7719
(define_insn "*vec_concatv4si"
7720
  [(set (match_operand:V4SI 0 "register_operand"       "=x,x,x,x,x")
7721
        (vec_concat:V4SI
7722
          (match_operand:V2SI 1 "register_operand"     " 0,x,0,0,x")
7723
          (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7724
  "TARGET_SSE"
7725
  "@
7726
   punpcklqdq\t{%2, %0|%0, %2}
7727
   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7728
   movlhps\t{%2, %0|%0, %2}
7729
   movhps\t{%2, %0|%0, %2}
7730
   vmovhps\t{%2, %1, %0|%0, %1, %2}"
7731
  [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7732
   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7733
   (set_attr "prefix" "orig,vex,orig,orig,vex")
7734
   (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7735
 
7736
;; movd instead of movq is required to handle broken assemblers.
7737
(define_insn "*vec_concatv2di_rex64"
7738
  [(set (match_operand:V2DI 0 "register_operand"
7739
          "=x,x ,x ,Yi,!x,x,x,x,x")
7740
        (vec_concat:V2DI
7741
          (match_operand:DI 1 "nonimmediate_operand"
7742
          " 0,x ,xm,r ,*y,0,x,0,x")
7743
          (match_operand:DI 2 "vector_move_operand"
7744
          "rm,rm,C ,C ,C ,x,x,m,m")))]
7745
  "TARGET_64BIT"
7746
  "@
7747
   pinsrq\t{$1, %2, %0|%0, %2, 1}
7748
   vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7749
   %vmovq\t{%1, %0|%0, %1}
7750
   %vmovd\t{%1, %0|%0, %1}
7751
   movq2dq\t{%1, %0|%0, %1}
7752
   punpcklqdq\t{%2, %0|%0, %2}
7753
   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7754
   movhps\t{%2, %0|%0, %2}
7755
   vmovhps\t{%2, %1, %0|%0, %1, %2}"
7756
  [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7757
   (set (attr "type")
7758
     (if_then_else
7759
       (eq_attr "alternative" "0,1,5,6")
7760
       (const_string "sselog")
7761
       (const_string "ssemov")))
7762
   (set (attr "prefix_rex")
7763
     (if_then_else
7764
       (and (eq_attr "alternative" "0,3")
7765
            (not (match_test "TARGET_AVX")))
7766
       (const_string "1")
7767
       (const_string "*")))
7768
   (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7769
   (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7770
   (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7771
   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7772
 
7773
(define_insn "vec_concatv2di"
7774
  [(set (match_operand:V2DI 0 "register_operand"     "=x,?x,x,x,x,x,x")
7775
        (vec_concat:V2DI
7776
          (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7777
          (match_operand:DI 2 "vector_move_operand"  " C, C,x,x,x,m,m")))]
7778
  "!TARGET_64BIT && TARGET_SSE"
7779
  "@
7780
   %vmovq\t{%1, %0|%0, %1}
7781
   movq2dq\t{%1, %0|%0, %1}
7782
   punpcklqdq\t{%2, %0|%0, %2}
7783
   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7784
   movlhps\t{%2, %0|%0, %2}
7785
   movhps\t{%2, %0|%0, %2}
7786
   vmovhps\t{%2, %1, %0|%0, %1, %2}"
7787
  [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7788
   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7789
   (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7790
   (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7791
 
7792
(define_expand "vec_unpacks_lo_"
7793
  [(match_operand: 0 "register_operand" "")
7794
   (match_operand:VI124_AVX2 1 "register_operand" "")]
7795
  "TARGET_SSE2"
7796
  "ix86_expand_sse_unpack (operands, false, false); DONE;")
7797
 
7798
(define_expand "vec_unpacks_hi_"
7799
  [(match_operand: 0 "register_operand" "")
7800
   (match_operand:VI124_AVX2 1 "register_operand" "")]
7801
  "TARGET_SSE2"
7802
  "ix86_expand_sse_unpack (operands, false, true); DONE;")
7803
 
7804
(define_expand "vec_unpacku_lo_"
7805
  [(match_operand: 0 "register_operand" "")
7806
   (match_operand:VI124_AVX2 1 "register_operand" "")]
7807
  "TARGET_SSE2"
7808
  "ix86_expand_sse_unpack (operands, true, false); DONE;")
7809
 
7810
(define_expand "vec_unpacku_hi_"
7811
  [(match_operand: 0 "register_operand" "")
7812
   (match_operand:VI124_AVX2 1 "register_operand" "")]
7813
  "TARGET_SSE2"
7814
  "ix86_expand_sse_unpack (operands, true, true); DONE;")
7815
 
7816
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7817
;;
7818
;; Miscellaneous
7819
;;
7820
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7821
 
7822
(define_expand "avx2_uavgv32qi3"
7823
  [(set (match_operand:V32QI 0 "register_operand" "")
7824
        (truncate:V32QI
7825
          (lshiftrt:V32HI
7826
            (plus:V32HI
7827
              (plus:V32HI
7828
                (zero_extend:V32HI
7829
                  (match_operand:V32QI 1 "nonimmediate_operand" ""))
7830
                (zero_extend:V32HI
7831
                  (match_operand:V32QI 2 "nonimmediate_operand" "")))
7832
              (const_vector:V32QI [(const_int 1) (const_int 1)
7833
                                   (const_int 1) (const_int 1)
7834
                                   (const_int 1) (const_int 1)
7835
                                   (const_int 1) (const_int 1)
7836
                                   (const_int 1) (const_int 1)
7837
                                   (const_int 1) (const_int 1)
7838
                                   (const_int 1) (const_int 1)
7839
                                   (const_int 1) (const_int 1)
7840
                                   (const_int 1) (const_int 1)
7841
                                   (const_int 1) (const_int 1)
7842
                                   (const_int 1) (const_int 1)
7843
                                   (const_int 1) (const_int 1)
7844
                                   (const_int 1) (const_int 1)
7845
                                   (const_int 1) (const_int 1)
7846
                                   (const_int 1) (const_int 1)
7847
                                   (const_int 1) (const_int 1)]))
7848
            (const_int 1))))]
7849
  "TARGET_AVX2"
7850
  "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7851
 
7852
(define_expand "sse2_uavgv16qi3"
7853
  [(set (match_operand:V16QI 0 "register_operand" "")
7854
        (truncate:V16QI
7855
          (lshiftrt:V16HI
7856
            (plus:V16HI
7857
              (plus:V16HI
7858
                (zero_extend:V16HI
7859
                  (match_operand:V16QI 1 "nonimmediate_operand" ""))
7860
                (zero_extend:V16HI
7861
                  (match_operand:V16QI 2 "nonimmediate_operand" "")))
7862
              (const_vector:V16QI [(const_int 1) (const_int 1)
7863
                                   (const_int 1) (const_int 1)
7864
                                   (const_int 1) (const_int 1)
7865
                                   (const_int 1) (const_int 1)
7866
                                   (const_int 1) (const_int 1)
7867
                                   (const_int 1) (const_int 1)
7868
                                   (const_int 1) (const_int 1)
7869
                                   (const_int 1) (const_int 1)]))
7870
            (const_int 1))))]
7871
  "TARGET_SSE2"
7872
  "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7873
 
7874
(define_insn "*avx2_uavgv32qi3"
7875
  [(set (match_operand:V32QI 0 "register_operand" "=x")
7876
        (truncate:V32QI
7877
          (lshiftrt:V32HI
7878
            (plus:V32HI
7879
              (plus:V32HI
7880
                (zero_extend:V32HI
7881
                  (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7882
                (zero_extend:V32HI
7883
                  (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7884
              (const_vector:V32QI [(const_int 1) (const_int 1)
7885
                                   (const_int 1) (const_int 1)
7886
                                   (const_int 1) (const_int 1)
7887
                                   (const_int 1) (const_int 1)
7888
                                   (const_int 1) (const_int 1)
7889
                                   (const_int 1) (const_int 1)
7890
                                   (const_int 1) (const_int 1)
7891
                                   (const_int 1) (const_int 1)
7892
                                   (const_int 1) (const_int 1)
7893
                                   (const_int 1) (const_int 1)
7894
                                   (const_int 1) (const_int 1)
7895
                                   (const_int 1) (const_int 1)
7896
                                   (const_int 1) (const_int 1)
7897
                                   (const_int 1) (const_int 1)
7898
                                   (const_int 1) (const_int 1)
7899
                                   (const_int 1) (const_int 1)]))
7900
            (const_int 1))))]
7901
  "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7902
  "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7903
  [(set_attr "type" "sseiadd")
7904
   (set_attr "prefix" "vex")
7905
   (set_attr "mode" "OI")])
7906
 
7907
(define_insn "*sse2_uavgv16qi3"
7908
  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7909
        (truncate:V16QI
7910
          (lshiftrt:V16HI
7911
            (plus:V16HI
7912
              (plus:V16HI
7913
                (zero_extend:V16HI
7914
                  (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7915
                (zero_extend:V16HI
7916
                  (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7917
              (const_vector:V16QI [(const_int 1) (const_int 1)
7918
                                   (const_int 1) (const_int 1)
7919
                                   (const_int 1) (const_int 1)
7920
                                   (const_int 1) (const_int 1)
7921
                                   (const_int 1) (const_int 1)
7922
                                   (const_int 1) (const_int 1)
7923
                                   (const_int 1) (const_int 1)
7924
                                   (const_int 1) (const_int 1)]))
7925
            (const_int 1))))]
7926
  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7927
  "@
7928
   pavgb\t{%2, %0|%0, %2}
7929
   vpavgb\t{%2, %1, %0|%0, %1, %2}"
7930
  [(set_attr "isa" "noavx,avx")
7931
   (set_attr "type" "sseiadd")
7932
   (set_attr "prefix_data16" "1,*")
7933
   (set_attr "prefix" "orig,vex")
7934
   (set_attr "mode" "TI")])
7935
 
7936
(define_expand "avx2_uavgv16hi3"
7937
  [(set (match_operand:V16HI 0 "register_operand" "")
7938
        (truncate:V16HI
7939
          (lshiftrt:V16SI
7940
            (plus:V16SI
7941
              (plus:V16SI
7942
                (zero_extend:V16SI
7943
                  (match_operand:V16HI 1 "nonimmediate_operand" ""))
7944
                (zero_extend:V16SI
7945
                  (match_operand:V16HI 2 "nonimmediate_operand" "")))
7946
              (const_vector:V16HI [(const_int 1) (const_int 1)
7947
                                   (const_int 1) (const_int 1)
7948
                                   (const_int 1) (const_int 1)
7949
                                   (const_int 1) (const_int 1)
7950
                                   (const_int 1) (const_int 1)
7951
                                   (const_int 1) (const_int 1)
7952
                                   (const_int 1) (const_int 1)
7953
                                   (const_int 1) (const_int 1)]))
7954
            (const_int 1))))]
7955
  "TARGET_AVX2"
7956
  "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7957
 
7958
(define_expand "sse2_uavgv8hi3"
7959
  [(set (match_operand:V8HI 0 "register_operand" "")
7960
        (truncate:V8HI
7961
          (lshiftrt:V8SI
7962
            (plus:V8SI
7963
              (plus:V8SI
7964
                (zero_extend:V8SI
7965
                  (match_operand:V8HI 1 "nonimmediate_operand" ""))
7966
                (zero_extend:V8SI
7967
                  (match_operand:V8HI 2 "nonimmediate_operand" "")))
7968
              (const_vector:V8HI [(const_int 1) (const_int 1)
7969
                                  (const_int 1) (const_int 1)
7970
                                  (const_int 1) (const_int 1)
7971
                                  (const_int 1) (const_int 1)]))
7972
            (const_int 1))))]
7973
  "TARGET_SSE2"
7974
  "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7975
 
7976
(define_insn "*avx2_uavgv16hi3"
7977
  [(set (match_operand:V16HI 0 "register_operand" "=x")
7978
        (truncate:V16HI
7979
          (lshiftrt:V16SI
7980
            (plus:V16SI
7981
              (plus:V16SI
7982
                (zero_extend:V16SI
7983
                  (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7984
                (zero_extend:V16SI
7985
                  (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7986
              (const_vector:V16HI [(const_int 1) (const_int 1)
7987
                                   (const_int 1) (const_int 1)
7988
                                   (const_int 1) (const_int 1)
7989
                                   (const_int 1) (const_int 1)
7990
                                   (const_int 1) (const_int 1)
7991
                                   (const_int 1) (const_int 1)
7992
                                   (const_int 1) (const_int 1)
7993
                                   (const_int 1) (const_int 1)]))
7994
            (const_int 1))))]
7995
  "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7996
  "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7997
  [(set_attr "type" "sseiadd")
7998
   (set_attr "prefix" "vex")
7999
   (set_attr "mode" "OI")])
8000
 
8001
(define_insn "*sse2_uavgv8hi3"
8002
  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8003
        (truncate:V8HI
8004
          (lshiftrt:V8SI
8005
            (plus:V8SI
8006
              (plus:V8SI
8007
                (zero_extend:V8SI
8008
                  (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
8009
                (zero_extend:V8SI
8010
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
8011
              (const_vector:V8HI [(const_int 1) (const_int 1)
8012
                                  (const_int 1) (const_int 1)
8013
                                  (const_int 1) (const_int 1)
8014
                                  (const_int 1) (const_int 1)]))
8015
            (const_int 1))))]
8016
  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
8017
  "@
8018
   pavgw\t{%2, %0|%0, %2}
8019
   vpavgw\t{%2, %1, %0|%0, %1, %2}"
8020
  [(set_attr "isa" "noavx,avx")
8021
   (set_attr "type" "sseiadd")
8022
   (set_attr "prefix_data16" "1,*")
8023
   (set_attr "prefix" "orig,vex")
8024
   (set_attr "mode" "TI")])
8025
 
8026
;; The correct representation for this is absolutely enormous, and
8027
;; surely not generally useful.
8028
(define_insn "_psadbw"
8029
  [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
8030
        (unspec:VI8_AVX2 [(match_operand: 1 "register_operand" "0,x")
8031
                          (match_operand: 2 "nonimmediate_operand" "xm,xm")]
8032
                          UNSPEC_PSADBW))]
8033
  "TARGET_SSE2"
8034
  "@
8035
   psadbw\t{%2, %0|%0, %2}
8036
   vpsadbw\t{%2, %1, %0|%0, %1, %2}"
8037
  [(set_attr "isa" "noavx,avx")
8038
   (set_attr "type" "sseiadd")
8039
   (set_attr "atom_unit" "simul")
8040
   (set_attr "prefix_data16" "1,*")
8041
   (set_attr "prefix" "orig,vex")
8042
   (set_attr "mode" "")])
8043
 
8044
(define_insn "_movmsk"
8045
  [(set (match_operand:SI 0 "register_operand" "=r")
8046
        (unspec:SI
8047
          [(match_operand:VF 1 "register_operand" "x")]
8048
          UNSPEC_MOVMSK))]
8049
  "TARGET_SSE"
8050
  "%vmovmsk\t{%1, %0|%0, %1}"
8051
  [(set_attr "type" "ssemov")
8052
   (set_attr "prefix" "maybe_vex")
8053
   (set_attr "mode" "")])
8054
 
8055
(define_insn "avx2_pmovmskb"
8056
  [(set (match_operand:SI 0 "register_operand" "=r")
8057
        (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
8058
                   UNSPEC_MOVMSK))]
8059
  "TARGET_AVX2"
8060
  "vpmovmskb\t{%1, %0|%0, %1}"
8061
  [(set_attr "type" "ssemov")
8062
   (set_attr "prefix" "vex")
8063
   (set_attr "mode" "DI")])
8064
 
8065
(define_insn "sse2_pmovmskb"
8066
  [(set (match_operand:SI 0 "register_operand" "=r")
8067
        (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8068
                   UNSPEC_MOVMSK))]
8069
  "TARGET_SSE2"
8070
  "%vpmovmskb\t{%1, %0|%0, %1}"
8071
  [(set_attr "type" "ssemov")
8072
   (set_attr "prefix_data16" "1")
8073
   (set_attr "prefix" "maybe_vex")
8074
   (set_attr "mode" "SI")])
8075
 
8076
(define_expand "sse2_maskmovdqu"
8077
  [(set (match_operand:V16QI 0 "memory_operand" "")
8078
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8079
                       (match_operand:V16QI 2 "register_operand" "")
8080
                       (match_dup 0)]
8081
                      UNSPEC_MASKMOV))]
8082
  "TARGET_SSE2")
8083
 
8084
(define_insn "*sse2_maskmovdqu"
8085
  [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
8086
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8087
                       (match_operand:V16QI 2 "register_operand" "x")
8088
                       (mem:V16QI (match_dup 0))]
8089
                      UNSPEC_MASKMOV))]
8090
  "TARGET_SSE2"
8091
  "%vmaskmovdqu\t{%2, %1|%1, %2}"
8092
  [(set_attr "type" "ssemov")
8093
   (set_attr "prefix_data16" "1")
8094
   ;; The implicit %rdi operand confuses default length_vex computation.
8095
   (set (attr "length_vex")
8096
     (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
8097
   (set_attr "prefix" "maybe_vex")
8098
   (set_attr "mode" "TI")])
8099
 
8100
(define_insn "sse_ldmxcsr"
8101
  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8102
                    UNSPECV_LDMXCSR)]
8103
  "TARGET_SSE"
8104
  "%vldmxcsr\t%0"
8105
  [(set_attr "type" "sse")
8106
   (set_attr "atom_sse_attr" "mxcsr")
8107
   (set_attr "prefix" "maybe_vex")
8108
   (set_attr "memory" "load")])
8109
 
8110
(define_insn "sse_stmxcsr"
8111
  [(set (match_operand:SI 0 "memory_operand" "=m")
8112
        (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8113
  "TARGET_SSE"
8114
  "%vstmxcsr\t%0"
8115
  [(set_attr "type" "sse")
8116
   (set_attr "atom_sse_attr" "mxcsr")
8117
   (set_attr "prefix" "maybe_vex")
8118
   (set_attr "memory" "store")])
8119
 
8120
(define_insn "sse2_clflush"
8121
  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8122
                    UNSPECV_CLFLUSH)]
8123
  "TARGET_SSE2"
8124
  "clflush\t%a0"
8125
  [(set_attr "type" "sse")
8126
   (set_attr "atom_sse_attr" "fence")
8127
   (set_attr "memory" "unknown")])
8128
 
8129
 
8130
(define_insn "sse3_mwait"
8131
  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8132
                     (match_operand:SI 1 "register_operand" "c")]
8133
                    UNSPECV_MWAIT)]
8134
  "TARGET_SSE3"
8135
;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8136
;; Since 32bit register operands are implicitly zero extended to 64bit,
8137
;; we only need to set up 32bit registers.
8138
  "mwait"
8139
  [(set_attr "length" "3")])
8140
 
8141
(define_insn "sse3_monitor"
8142
  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8143
                     (match_operand:SI 1 "register_operand" "c")
8144
                     (match_operand:SI 2 "register_operand" "d")]
8145
                    UNSPECV_MONITOR)]
8146
  "TARGET_SSE3 && !TARGET_64BIT"
8147
  "monitor\t%0, %1, %2"
8148
  [(set_attr "length" "3")])
8149
 
8150
(define_insn "sse3_monitor64"
8151
  [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8152
                     (match_operand:SI 1 "register_operand" "c")
8153
                     (match_operand:SI 2 "register_operand" "d")]
8154
                    UNSPECV_MONITOR)]
8155
  "TARGET_SSE3 && TARGET_64BIT"
8156
;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8157
;; RCX and RDX are used.  Since 32bit register operands are implicitly
8158
;; zero extended to 64bit, we only need to set up 32bit registers.
8159
  "monitor"
8160
  [(set_attr "length" "3")])
8161
 
8162
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8163
;;
8164
;; SSSE3 instructions
8165
;;
8166
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8167
 
8168
(define_insn "avx2_phaddwv16hi3"
8169
  [(set (match_operand:V16HI 0 "register_operand" "=x")
8170
        (vec_concat:V16HI
8171
          (vec_concat:V8HI
8172
            (vec_concat:V4HI
8173
              (vec_concat:V2HI
8174
                (plus:HI
8175
                  (vec_select:HI
8176
                    (match_operand:V16HI 1 "register_operand" "x")
8177
                    (parallel [(const_int 0)]))
8178
                  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8179
                (plus:HI
8180
                  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8181
                  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8182
              (vec_concat:V2HI
8183
                (plus:HI
8184
                  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8185
                  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8186
                (plus:HI
8187
                  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8188
                  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8189
            (vec_concat:V4HI
8190
              (vec_concat:V2HI
8191
                (plus:HI
8192
                  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8193
                  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8194
                (plus:HI
8195
                  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8196
                  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8197
              (vec_concat:V2HI
8198
                (plus:HI
8199
                  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8200
                  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8201
                (plus:HI
8202
                  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8203
                  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8204
          (vec_concat:V8HI
8205
            (vec_concat:V4HI
8206
              (vec_concat:V2HI
8207
                (plus:HI
8208
                  (vec_select:HI
8209
                    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8210
                    (parallel [(const_int 0)]))
8211
                  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8212
                (plus:HI
8213
                  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8214
                  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8215
              (vec_concat:V2HI
8216
                (plus:HI
8217
                  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8218
                  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8219
                (plus:HI
8220
                  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8221
                  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8222
            (vec_concat:V4HI
8223
              (vec_concat:V2HI
8224
                (plus:HI
8225
                  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8226
                  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8227
                (plus:HI
8228
                  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8229
                  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8230
              (vec_concat:V2HI
8231
                (plus:HI
8232
                  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8233
                  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8234
                (plus:HI
8235
                  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8236
                  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8237
  "TARGET_AVX2"
8238
  "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8239
  [(set_attr "type" "sseiadd")
8240
   (set_attr "prefix_extra" "1")
8241
   (set_attr "prefix" "vex")
8242
   (set_attr "mode" "OI")])
8243
 
8244
(define_insn "ssse3_phaddwv8hi3"
8245
  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8246
        (vec_concat:V8HI
8247
          (vec_concat:V4HI
8248
            (vec_concat:V2HI
8249
              (plus:HI
8250
                (vec_select:HI
8251
                  (match_operand:V8HI 1 "register_operand" "0,x")
8252
                  (parallel [(const_int 0)]))
8253
                (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8254
              (plus:HI
8255
                (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8256
                (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8257
            (vec_concat:V2HI
8258
              (plus:HI
8259
                (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8260
                (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8261
              (plus:HI
8262
                (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8263
                (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8264
          (vec_concat:V4HI
8265
            (vec_concat:V2HI
8266
              (plus:HI
8267
                (vec_select:HI
8268
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8269
                  (parallel [(const_int 0)]))
8270
                (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8271
              (plus:HI
8272
                (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8273
                (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8274
            (vec_concat:V2HI
8275
              (plus:HI
8276
                (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8277
                (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8278
              (plus:HI
8279
                (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8280
                (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8281
  "TARGET_SSSE3"
8282
  "@
8283
   phaddw\t{%2, %0|%0, %2}
8284
   vphaddw\t{%2, %1, %0|%0, %1, %2}"
8285
  [(set_attr "isa" "noavx,avx")
8286
   (set_attr "type" "sseiadd")
8287
   (set_attr "atom_unit" "complex")
8288
   (set_attr "prefix_data16" "1,*")
8289
   (set_attr "prefix_extra" "1")
8290
   (set_attr "prefix" "orig,vex")
8291
   (set_attr "mode" "TI")])
8292
 
8293
(define_insn "ssse3_phaddwv4hi3"
8294
  [(set (match_operand:V4HI 0 "register_operand" "=y")
8295
        (vec_concat:V4HI
8296
          (vec_concat:V2HI
8297
            (plus:HI
8298
              (vec_select:HI
8299
                (match_operand:V4HI 1 "register_operand" "0")
8300
                (parallel [(const_int 0)]))
8301
              (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8302
            (plus:HI
8303
              (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8304
              (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8305
          (vec_concat:V2HI
8306
            (plus:HI
8307
              (vec_select:HI
8308
                (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8309
                (parallel [(const_int 0)]))
8310
              (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8311
            (plus:HI
8312
              (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8313
              (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8314
  "TARGET_SSSE3"
8315
  "phaddw\t{%2, %0|%0, %2}"
8316
  [(set_attr "type" "sseiadd")
8317
   (set_attr "atom_unit" "complex")
8318
   (set_attr "prefix_extra" "1")
8319
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8320
   (set_attr "mode" "DI")])
8321
 
8322
(define_insn "avx2_phadddv8si3"
8323
  [(set (match_operand:V8SI 0 "register_operand" "=x")
8324
        (vec_concat:V8SI
8325
          (vec_concat:V4SI
8326
            (vec_concat:V2SI
8327
              (plus:SI
8328
                (vec_select:SI
8329
                  (match_operand:V8SI 1 "register_operand" "x")
8330
                  (parallel [(const_int 0)]))
8331
                (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8332
              (plus:SI
8333
                (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8334
                (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8335
            (vec_concat:V2SI
8336
              (plus:SI
8337
                (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8338
                (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8339
              (plus:SI
8340
                (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8341
                (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8342
          (vec_concat:V4SI
8343
            (vec_concat:V2SI
8344
              (plus:SI
8345
                (vec_select:SI
8346
                  (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8347
                  (parallel [(const_int 0)]))
8348
                (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8349
              (plus:SI
8350
                (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8351
                (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8352
            (vec_concat:V2SI
8353
              (plus:SI
8354
                (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8355
                (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8356
              (plus:SI
8357
                (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8358
                (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8359
  "TARGET_AVX2"
8360
  "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8361
  [(set_attr "type" "sseiadd")
8362
   (set_attr "prefix_extra" "1")
8363
   (set_attr "prefix" "vex")
8364
   (set_attr "mode" "OI")])
8365
 
8366
(define_insn "ssse3_phadddv4si3"
8367
  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8368
        (vec_concat:V4SI
8369
          (vec_concat:V2SI
8370
            (plus:SI
8371
              (vec_select:SI
8372
                (match_operand:V4SI 1 "register_operand" "0,x")
8373
                (parallel [(const_int 0)]))
8374
              (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8375
            (plus:SI
8376
              (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8377
              (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8378
          (vec_concat:V2SI
8379
            (plus:SI
8380
              (vec_select:SI
8381
                (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8382
                (parallel [(const_int 0)]))
8383
              (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8384
            (plus:SI
8385
              (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8386
              (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8387
  "TARGET_SSSE3"
8388
  "@
8389
   phaddd\t{%2, %0|%0, %2}
8390
   vphaddd\t{%2, %1, %0|%0, %1, %2}"
8391
  [(set_attr "isa" "noavx,avx")
8392
   (set_attr "type" "sseiadd")
8393
   (set_attr "atom_unit" "complex")
8394
   (set_attr "prefix_data16" "1,*")
8395
   (set_attr "prefix_extra" "1")
8396
   (set_attr "prefix" "orig,vex")
8397
   (set_attr "mode" "TI")])
8398
 
8399
(define_insn "ssse3_phadddv2si3"
8400
  [(set (match_operand:V2SI 0 "register_operand" "=y")
8401
        (vec_concat:V2SI
8402
          (plus:SI
8403
            (vec_select:SI
8404
              (match_operand:V2SI 1 "register_operand" "0")
8405
              (parallel [(const_int 0)]))
8406
            (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8407
          (plus:SI
8408
            (vec_select:SI
8409
              (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8410
              (parallel [(const_int 0)]))
8411
            (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8412
  "TARGET_SSSE3"
8413
  "phaddd\t{%2, %0|%0, %2}"
8414
  [(set_attr "type" "sseiadd")
8415
   (set_attr "atom_unit" "complex")
8416
   (set_attr "prefix_extra" "1")
8417
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8418
   (set_attr "mode" "DI")])
8419
 
8420
(define_insn "avx2_phaddswv16hi3"
8421
  [(set (match_operand:V16HI 0 "register_operand" "=x")
8422
        (vec_concat:V16HI
8423
          (vec_concat:V8HI
8424
            (vec_concat:V4HI
8425
              (vec_concat:V2HI
8426
                (ss_plus:HI
8427
                  (vec_select:HI
8428
                    (match_operand:V16HI 1 "register_operand" "x")
8429
                    (parallel [(const_int 0)]))
8430
                  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8431
                (ss_plus:HI
8432
                  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8433
                  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8434
              (vec_concat:V2HI
8435
                (ss_plus:HI
8436
                  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8437
                  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8438
                (ss_plus:HI
8439
                  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8440
                  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8441
            (vec_concat:V4HI
8442
              (vec_concat:V2HI
8443
                (ss_plus:HI
8444
                  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8445
                  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8446
                (ss_plus:HI
8447
                  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8448
                  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8449
              (vec_concat:V2HI
8450
                (ss_plus:HI
8451
                  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8452
                  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8453
                (ss_plus:HI
8454
                  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8455
                  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8456
          (vec_concat:V8HI
8457
            (vec_concat:V4HI
8458
              (vec_concat:V2HI
8459
                (ss_plus:HI
8460
                  (vec_select:HI
8461
                    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8462
                    (parallel [(const_int 0)]))
8463
                  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8464
                (ss_plus:HI
8465
                  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8466
                  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8467
              (vec_concat:V2HI
8468
                (ss_plus:HI
8469
                  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8470
                  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8471
                (ss_plus:HI
8472
                  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8473
                  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8474
            (vec_concat:V4HI
8475
              (vec_concat:V2HI
8476
                (ss_plus:HI
8477
                  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8478
                  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8479
                (ss_plus:HI
8480
                  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8481
                  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8482
              (vec_concat:V2HI
8483
                (ss_plus:HI
8484
                  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8485
                  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8486
                (ss_plus:HI
8487
                  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8488
                  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8489
  "TARGET_AVX2"
8490
  "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8491
  [(set_attr "type" "sseiadd")
8492
   (set_attr "prefix_extra" "1")
8493
   (set_attr "prefix" "vex")
8494
   (set_attr "mode" "OI")])
8495
 
8496
(define_insn "ssse3_phaddswv8hi3"
8497
  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8498
        (vec_concat:V8HI
8499
          (vec_concat:V4HI
8500
            (vec_concat:V2HI
8501
              (ss_plus:HI
8502
                (vec_select:HI
8503
                  (match_operand:V8HI 1 "register_operand" "0,x")
8504
                  (parallel [(const_int 0)]))
8505
                (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8506
              (ss_plus:HI
8507
                (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8508
                (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8509
            (vec_concat:V2HI
8510
              (ss_plus:HI
8511
                (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8512
                (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8513
              (ss_plus:HI
8514
                (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8515
                (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8516
          (vec_concat:V4HI
8517
            (vec_concat:V2HI
8518
              (ss_plus:HI
8519
                (vec_select:HI
8520
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8521
                  (parallel [(const_int 0)]))
8522
                (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8523
              (ss_plus:HI
8524
                (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8525
                (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8526
            (vec_concat:V2HI
8527
              (ss_plus:HI
8528
                (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8529
                (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8530
              (ss_plus:HI
8531
                (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8532
                (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8533
  "TARGET_SSSE3"
8534
  "@
8535
   phaddsw\t{%2, %0|%0, %2}
8536
   vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8537
  [(set_attr "isa" "noavx,avx")
8538
   (set_attr "type" "sseiadd")
8539
   (set_attr "atom_unit" "complex")
8540
   (set_attr "prefix_data16" "1,*")
8541
   (set_attr "prefix_extra" "1")
8542
   (set_attr "prefix" "orig,vex")
8543
   (set_attr "mode" "TI")])
8544
 
8545
(define_insn "ssse3_phaddswv4hi3"
8546
  [(set (match_operand:V4HI 0 "register_operand" "=y")
8547
        (vec_concat:V4HI
8548
          (vec_concat:V2HI
8549
            (ss_plus:HI
8550
              (vec_select:HI
8551
                (match_operand:V4HI 1 "register_operand" "0")
8552
                (parallel [(const_int 0)]))
8553
              (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8554
            (ss_plus:HI
8555
              (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8556
              (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8557
          (vec_concat:V2HI
8558
            (ss_plus:HI
8559
              (vec_select:HI
8560
                (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8561
                (parallel [(const_int 0)]))
8562
              (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8563
            (ss_plus:HI
8564
              (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8565
              (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8566
  "TARGET_SSSE3"
8567
  "phaddsw\t{%2, %0|%0, %2}"
8568
  [(set_attr "type" "sseiadd")
8569
   (set_attr "atom_unit" "complex")
8570
   (set_attr "prefix_extra" "1")
8571
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8572
   (set_attr "mode" "DI")])
8573
 
8574
(define_insn "avx2_phsubwv16hi3"
8575
  [(set (match_operand:V16HI 0 "register_operand" "=x")
8576
        (vec_concat:V16HI
8577
          (vec_concat:V8HI
8578
            (vec_concat:V4HI
8579
              (vec_concat:V2HI
8580
                (minus:HI
8581
                  (vec_select:HI
8582
                    (match_operand:V16HI 1 "register_operand" "x")
8583
                    (parallel [(const_int 0)]))
8584
                  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8585
                (minus:HI
8586
                  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8587
                  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8588
              (vec_concat:V2HI
8589
                (minus:HI
8590
                  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8591
                  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8592
                (minus:HI
8593
                  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8594
                  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8595
            (vec_concat:V4HI
8596
              (vec_concat:V2HI
8597
                (minus:HI
8598
                  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8599
                  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8600
                (minus:HI
8601
                  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8602
                  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8603
              (vec_concat:V2HI
8604
                (minus:HI
8605
                  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8606
                  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8607
                (minus:HI
8608
                  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8609
                  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8610
          (vec_concat:V8HI
8611
            (vec_concat:V4HI
8612
              (vec_concat:V2HI
8613
                (minus:HI
8614
                  (vec_select:HI
8615
                    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8616
                    (parallel [(const_int 0)]))
8617
                  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8618
                (minus:HI
8619
                  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8620
                  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8621
              (vec_concat:V2HI
8622
                (minus:HI
8623
                  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8624
                  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8625
                (minus:HI
8626
                  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8627
                  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8628
            (vec_concat:V4HI
8629
              (vec_concat:V2HI
8630
                (minus:HI
8631
                  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8632
                  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8633
                (minus:HI
8634
                  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8635
                  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8636
              (vec_concat:V2HI
8637
                (minus:HI
8638
                  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8639
                  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8640
                (minus:HI
8641
                  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8642
                  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8643
  "TARGET_AVX2"
8644
  "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8645
  [(set_attr "type" "sseiadd")
8646
   (set_attr "prefix_extra" "1")
8647
   (set_attr "prefix" "vex")
8648
   (set_attr "mode" "OI")])
8649
 
8650
(define_insn "ssse3_phsubwv8hi3"
8651
  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8652
        (vec_concat:V8HI
8653
          (vec_concat:V4HI
8654
            (vec_concat:V2HI
8655
              (minus:HI
8656
                (vec_select:HI
8657
                  (match_operand:V8HI 1 "register_operand" "0,x")
8658
                  (parallel [(const_int 0)]))
8659
                (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8660
              (minus:HI
8661
                (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8662
                (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8663
            (vec_concat:V2HI
8664
              (minus:HI
8665
                (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8666
                (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8667
              (minus:HI
8668
                (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8669
                (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8670
          (vec_concat:V4HI
8671
            (vec_concat:V2HI
8672
              (minus:HI
8673
                (vec_select:HI
8674
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8675
                  (parallel [(const_int 0)]))
8676
                (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8677
              (minus:HI
8678
                (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8679
                (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8680
            (vec_concat:V2HI
8681
              (minus:HI
8682
                (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8683
                (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8684
              (minus:HI
8685
                (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8686
                (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8687
  "TARGET_SSSE3"
8688
  "@
8689
   phsubw\t{%2, %0|%0, %2}
8690
   vphsubw\t{%2, %1, %0|%0, %1, %2}"
8691
  [(set_attr "isa" "noavx,avx")
8692
   (set_attr "type" "sseiadd")
8693
   (set_attr "atom_unit" "complex")
8694
   (set_attr "prefix_data16" "1,*")
8695
   (set_attr "prefix_extra" "1")
8696
   (set_attr "prefix" "orig,vex")
8697
   (set_attr "mode" "TI")])
8698
 
8699
(define_insn "ssse3_phsubwv4hi3"
8700
  [(set (match_operand:V4HI 0 "register_operand" "=y")
8701
        (vec_concat:V4HI
8702
          (vec_concat:V2HI
8703
            (minus:HI
8704
              (vec_select:HI
8705
                (match_operand:V4HI 1 "register_operand" "0")
8706
                (parallel [(const_int 0)]))
8707
              (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8708
            (minus:HI
8709
              (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8710
              (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8711
          (vec_concat:V2HI
8712
            (minus:HI
8713
              (vec_select:HI
8714
                (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8715
                (parallel [(const_int 0)]))
8716
              (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8717
            (minus:HI
8718
              (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8719
              (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8720
  "TARGET_SSSE3"
8721
  "phsubw\t{%2, %0|%0, %2}"
8722
  [(set_attr "type" "sseiadd")
8723
   (set_attr "atom_unit" "complex")
8724
   (set_attr "prefix_extra" "1")
8725
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8726
   (set_attr "mode" "DI")])
8727
 
8728
(define_insn "avx2_phsubdv8si3"
8729
  [(set (match_operand:V8SI 0 "register_operand" "=x")
8730
        (vec_concat:V8SI
8731
          (vec_concat:V4SI
8732
            (vec_concat:V2SI
8733
              (minus:SI
8734
                (vec_select:SI
8735
                  (match_operand:V8SI 1 "register_operand" "x")
8736
                  (parallel [(const_int 0)]))
8737
                (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8738
              (minus:SI
8739
                (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8740
                (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8741
            (vec_concat:V2SI
8742
              (minus:SI
8743
                (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8744
                (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8745
              (minus:SI
8746
                (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8747
                (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8748
          (vec_concat:V4SI
8749
            (vec_concat:V2SI
8750
              (minus:SI
8751
                (vec_select:SI
8752
                  (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8753
                  (parallel [(const_int 0)]))
8754
                (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8755
              (minus:SI
8756
                (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8757
                (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8758
            (vec_concat:V2SI
8759
              (minus:SI
8760
                (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8761
                (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8762
              (minus:SI
8763
                (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8764
                (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8765
  "TARGET_AVX2"
8766
  "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8767
  [(set_attr "type" "sseiadd")
8768
   (set_attr "prefix_extra" "1")
8769
   (set_attr "prefix" "vex")
8770
   (set_attr "mode" "OI")])
8771
 
8772
(define_insn "ssse3_phsubdv4si3"
8773
  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8774
        (vec_concat:V4SI
8775
          (vec_concat:V2SI
8776
            (minus:SI
8777
              (vec_select:SI
8778
                (match_operand:V4SI 1 "register_operand" "0,x")
8779
                (parallel [(const_int 0)]))
8780
              (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8781
            (minus:SI
8782
              (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8783
              (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8784
          (vec_concat:V2SI
8785
            (minus:SI
8786
              (vec_select:SI
8787
                (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8788
                (parallel [(const_int 0)]))
8789
              (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8790
            (minus:SI
8791
              (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8792
              (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8793
  "TARGET_SSSE3"
8794
  "@
8795
   phsubd\t{%2, %0|%0, %2}
8796
   vphsubd\t{%2, %1, %0|%0, %1, %2}"
8797
 
8798
  [(set_attr "isa" "noavx,avx")
8799
   (set_attr "type" "sseiadd")
8800
   (set_attr "atom_unit" "complex")
8801
   (set_attr "prefix_data16" "1,*")
8802
   (set_attr "prefix_extra" "1")
8803
   (set_attr "prefix" "orig,vex")
8804
   (set_attr "mode" "TI")])
8805
 
8806
(define_insn "ssse3_phsubdv2si3"
8807
  [(set (match_operand:V2SI 0 "register_operand" "=y")
8808
        (vec_concat:V2SI
8809
          (minus:SI
8810
            (vec_select:SI
8811
              (match_operand:V2SI 1 "register_operand" "0")
8812
              (parallel [(const_int 0)]))
8813
            (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8814
          (minus:SI
8815
            (vec_select:SI
8816
              (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8817
              (parallel [(const_int 0)]))
8818
            (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8819
  "TARGET_SSSE3"
8820
  "phsubd\t{%2, %0|%0, %2}"
8821
  [(set_attr "type" "sseiadd")
8822
   (set_attr "atom_unit" "complex")
8823
   (set_attr "prefix_extra" "1")
8824
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8825
   (set_attr "mode" "DI")])
8826
 
8827
(define_insn "avx2_phsubswv16hi3"
8828
  [(set (match_operand:V16HI 0 "register_operand" "=x")
8829
        (vec_concat:V16HI
8830
          (vec_concat:V8HI
8831
            (vec_concat:V4HI
8832
              (vec_concat:V2HI
8833
                (ss_minus:HI
8834
                  (vec_select:HI
8835
                    (match_operand:V16HI 1 "register_operand" "x")
8836
                    (parallel [(const_int 0)]))
8837
                  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8838
                (ss_minus:HI
8839
                  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8840
                  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8841
              (vec_concat:V2HI
8842
                (ss_minus:HI
8843
                  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8844
                  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8845
                (ss_minus:HI
8846
                  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8847
                  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8848
            (vec_concat:V4HI
8849
              (vec_concat:V2HI
8850
                (ss_minus:HI
8851
                  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8852
                  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8853
                (ss_minus:HI
8854
                  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8855
                  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8856
              (vec_concat:V2HI
8857
                (ss_minus:HI
8858
                  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8859
                  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8860
                (ss_minus:HI
8861
                  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8862
                  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8863
          (vec_concat:V8HI
8864
            (vec_concat:V4HI
8865
              (vec_concat:V2HI
8866
                (ss_minus:HI
8867
                  (vec_select:HI
8868
                    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8869
                    (parallel [(const_int 0)]))
8870
                  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8871
                (ss_minus:HI
8872
                  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8873
                  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8874
              (vec_concat:V2HI
8875
                (ss_minus:HI
8876
                  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8877
                  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8878
                (ss_minus:HI
8879
                  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8880
                  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8881
            (vec_concat:V4HI
8882
              (vec_concat:V2HI
8883
                (ss_minus:HI
8884
                  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8885
                  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8886
                (ss_minus:HI
8887
                  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8888
                  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8889
              (vec_concat:V2HI
8890
                (ss_minus:HI
8891
                  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8892
                  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8893
                (ss_minus:HI
8894
                  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8895
                  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8896
  "TARGET_AVX2"
8897
  "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8898
  [(set_attr "type" "sseiadd")
8899
   (set_attr "prefix_extra" "1")
8900
   (set_attr "prefix" "vex")
8901
   (set_attr "mode" "OI")])
8902
 
8903
(define_insn "ssse3_phsubswv8hi3"
8904
  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8905
        (vec_concat:V8HI
8906
          (vec_concat:V4HI
8907
            (vec_concat:V2HI
8908
              (ss_minus:HI
8909
                (vec_select:HI
8910
                  (match_operand:V8HI 1 "register_operand" "0,x")
8911
                  (parallel [(const_int 0)]))
8912
                (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8913
              (ss_minus:HI
8914
                (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8915
                (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8916
            (vec_concat:V2HI
8917
              (ss_minus:HI
8918
                (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8919
                (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8920
              (ss_minus:HI
8921
                (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8922
                (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8923
          (vec_concat:V4HI
8924
            (vec_concat:V2HI
8925
              (ss_minus:HI
8926
                (vec_select:HI
8927
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8928
                  (parallel [(const_int 0)]))
8929
                (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8930
              (ss_minus:HI
8931
                (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8932
                (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8933
            (vec_concat:V2HI
8934
              (ss_minus:HI
8935
                (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8936
                (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8937
              (ss_minus:HI
8938
                (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8939
                (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8940
  "TARGET_SSSE3"
8941
  "@
8942
   phsubsw\t{%2, %0|%0, %2}
8943
   vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8944
  [(set_attr "isa" "noavx,avx")
8945
   (set_attr "type" "sseiadd")
8946
   (set_attr "atom_unit" "complex")
8947
   (set_attr "prefix_data16" "1,*")
8948
   (set_attr "prefix_extra" "1")
8949
   (set_attr "prefix" "orig,vex")
8950
   (set_attr "mode" "TI")])
8951
 
8952
(define_insn "ssse3_phsubswv4hi3"
8953
  [(set (match_operand:V4HI 0 "register_operand" "=y")
8954
        (vec_concat:V4HI
8955
          (vec_concat:V2HI
8956
            (ss_minus:HI
8957
              (vec_select:HI
8958
                (match_operand:V4HI 1 "register_operand" "0")
8959
                (parallel [(const_int 0)]))
8960
              (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8961
            (ss_minus:HI
8962
              (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8963
              (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8964
          (vec_concat:V2HI
8965
            (ss_minus:HI
8966
              (vec_select:HI
8967
                (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8968
                (parallel [(const_int 0)]))
8969
              (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8970
            (ss_minus:HI
8971
              (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8972
              (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8973
  "TARGET_SSSE3"
8974
  "phsubsw\t{%2, %0|%0, %2}"
8975
  [(set_attr "type" "sseiadd")
8976
   (set_attr "atom_unit" "complex")
8977
   (set_attr "prefix_extra" "1")
8978
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8979
   (set_attr "mode" "DI")])
8980
 
8981
(define_insn "avx2_pmaddubsw256"
8982
  [(set (match_operand:V16HI 0 "register_operand" "=x")
8983
        (ss_plus:V16HI
8984
          (mult:V16HI
8985
            (zero_extend:V16HI
8986
              (vec_select:V16QI
8987
                (match_operand:V32QI 1 "register_operand" "x")
8988
                (parallel [(const_int 0)
8989
                           (const_int 2)
8990
                           (const_int 4)
8991
                           (const_int 6)
8992
                           (const_int 8)
8993
                           (const_int 10)
8994
                           (const_int 12)
8995
                           (const_int 14)
8996
                           (const_int 16)
8997
                           (const_int 18)
8998
                           (const_int 20)
8999
                           (const_int 22)
9000
                           (const_int 24)
9001
                           (const_int 26)
9002
                           (const_int 28)
9003
                           (const_int 30)])))
9004
            (sign_extend:V16HI
9005
              (vec_select:V16QI
9006
                (match_operand:V32QI 2 "nonimmediate_operand" "xm")
9007
                (parallel [(const_int 0)
9008
                           (const_int 2)
9009
                           (const_int 4)
9010
                           (const_int 6)
9011
                           (const_int 8)
9012
                           (const_int 10)
9013
                           (const_int 12)
9014
                           (const_int 14)
9015
                           (const_int 16)
9016
                           (const_int 18)
9017
                           (const_int 20)
9018
                           (const_int 22)
9019
                           (const_int 24)
9020
                           (const_int 26)
9021
                           (const_int 28)
9022
                           (const_int 30)]))))
9023
          (mult:V16HI
9024
            (zero_extend:V16HI
9025
              (vec_select:V16QI (match_dup 1)
9026
                (parallel [(const_int 1)
9027
                           (const_int 3)
9028
                           (const_int 5)
9029
                           (const_int 7)
9030
                           (const_int 9)
9031
                           (const_int 11)
9032
                           (const_int 13)
9033
                           (const_int 15)
9034
                           (const_int 17)
9035
                           (const_int 19)
9036
                           (const_int 21)
9037
                           (const_int 23)
9038
                           (const_int 25)
9039
                           (const_int 27)
9040
                           (const_int 29)
9041
                           (const_int 31)])))
9042
            (sign_extend:V16HI
9043
              (vec_select:V16QI (match_dup 2)
9044
                (parallel [(const_int 1)
9045
                           (const_int 3)
9046
                           (const_int 5)
9047
                           (const_int 7)
9048
                           (const_int 9)
9049
                           (const_int 11)
9050
                           (const_int 13)
9051
                           (const_int 15)
9052
                           (const_int 17)
9053
                           (const_int 19)
9054
                           (const_int 21)
9055
                           (const_int 23)
9056
                           (const_int 25)
9057
                           (const_int 27)
9058
                           (const_int 29)
9059
                           (const_int 31)]))))))]
9060
  "TARGET_AVX2"
9061
  "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9062
  [(set_attr "type" "sseiadd")
9063
   (set_attr "prefix_extra" "1")
9064
   (set_attr "prefix" "vex")
9065
   (set_attr "mode" "OI")])
9066
 
9067
(define_insn "ssse3_pmaddubsw128"
9068
  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9069
        (ss_plus:V8HI
9070
          (mult:V8HI
9071
            (zero_extend:V8HI
9072
              (vec_select:V8QI
9073
                (match_operand:V16QI 1 "register_operand" "0,x")
9074
                (parallel [(const_int 0)
9075
                           (const_int 2)
9076
                           (const_int 4)
9077
                           (const_int 6)
9078
                           (const_int 8)
9079
                           (const_int 10)
9080
                           (const_int 12)
9081
                           (const_int 14)])))
9082
            (sign_extend:V8HI
9083
              (vec_select:V8QI
9084
                (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
9085
                (parallel [(const_int 0)
9086
                           (const_int 2)
9087
                           (const_int 4)
9088
                           (const_int 6)
9089
                           (const_int 8)
9090
                           (const_int 10)
9091
                           (const_int 12)
9092
                           (const_int 14)]))))
9093
          (mult:V8HI
9094
            (zero_extend:V8HI
9095
              (vec_select:V8QI (match_dup 1)
9096
                (parallel [(const_int 1)
9097
                           (const_int 3)
9098
                           (const_int 5)
9099
                           (const_int 7)
9100
                           (const_int 9)
9101
                           (const_int 11)
9102
                           (const_int 13)
9103
                           (const_int 15)])))
9104
            (sign_extend:V8HI
9105
              (vec_select:V8QI (match_dup 2)
9106
                (parallel [(const_int 1)
9107
                           (const_int 3)
9108
                           (const_int 5)
9109
                           (const_int 7)
9110
                           (const_int 9)
9111
                           (const_int 11)
9112
                           (const_int 13)
9113
                           (const_int 15)]))))))]
9114
  "TARGET_SSSE3"
9115
  "@
9116
   pmaddubsw\t{%2, %0|%0, %2}
9117
   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9118
  [(set_attr "isa" "noavx,avx")
9119
   (set_attr "type" "sseiadd")
9120
   (set_attr "atom_unit" "simul")
9121
   (set_attr "prefix_data16" "1,*")
9122
   (set_attr "prefix_extra" "1")
9123
   (set_attr "prefix" "orig,vex")
9124
   (set_attr "mode" "TI")])
9125
 
9126
(define_insn "ssse3_pmaddubsw"
9127
  [(set (match_operand:V4HI 0 "register_operand" "=y")
9128
        (ss_plus:V4HI
9129
          (mult:V4HI
9130
            (zero_extend:V4HI
9131
              (vec_select:V4QI
9132
                (match_operand:V8QI 1 "register_operand" "0")
9133
                (parallel [(const_int 0)
9134
                           (const_int 2)
9135
                           (const_int 4)
9136
                           (const_int 6)])))
9137
            (sign_extend:V4HI
9138
              (vec_select:V4QI
9139
                (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9140
                (parallel [(const_int 0)
9141
                           (const_int 2)
9142
                           (const_int 4)
9143
                           (const_int 6)]))))
9144
          (mult:V4HI
9145
            (zero_extend:V4HI
9146
              (vec_select:V4QI (match_dup 1)
9147
                (parallel [(const_int 1)
9148
                           (const_int 3)
9149
                           (const_int 5)
9150
                           (const_int 7)])))
9151
            (sign_extend:V4HI
9152
              (vec_select:V4QI (match_dup 2)
9153
                (parallel [(const_int 1)
9154
                           (const_int 3)
9155
                           (const_int 5)
9156
                           (const_int 7)]))))))]
9157
  "TARGET_SSSE3"
9158
  "pmaddubsw\t{%2, %0|%0, %2}"
9159
  [(set_attr "type" "sseiadd")
9160
   (set_attr "atom_unit" "simul")
9161
   (set_attr "prefix_extra" "1")
9162
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9163
   (set_attr "mode" "DI")])
9164
 
9165
(define_expand "avx2_umulhrswv16hi3"
9166
  [(set (match_operand:V16HI 0 "register_operand" "")
9167
        (truncate:V16HI
9168
          (lshiftrt:V16SI
9169
            (plus:V16SI
9170
              (lshiftrt:V16SI
9171
                (mult:V16SI
9172
                  (sign_extend:V16SI
9173
                    (match_operand:V16HI 1 "nonimmediate_operand" ""))
9174
                  (sign_extend:V16SI
9175
                    (match_operand:V16HI 2 "nonimmediate_operand" "")))
9176
                (const_int 14))
9177
              (const_vector:V16HI [(const_int 1) (const_int 1)
9178
                                   (const_int 1) (const_int 1)
9179
                                   (const_int 1) (const_int 1)
9180
                                   (const_int 1) (const_int 1)
9181
                                   (const_int 1) (const_int 1)
9182
                                   (const_int 1) (const_int 1)
9183
                                   (const_int 1) (const_int 1)
9184
                                   (const_int 1) (const_int 1)]))
9185
            (const_int 1))))]
9186
  "TARGET_AVX2"
9187
  "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9188
 
9189
(define_insn "*avx2_umulhrswv16hi3"
9190
  [(set (match_operand:V16HI 0 "register_operand" "=x")
9191
        (truncate:V16HI
9192
          (lshiftrt:V16SI
9193
            (plus:V16SI
9194
              (lshiftrt:V16SI
9195
                (mult:V16SI
9196
                  (sign_extend:V16SI
9197
                    (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9198
                  (sign_extend:V16SI
9199
                    (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9200
                (const_int 14))
9201
              (const_vector:V16HI [(const_int 1) (const_int 1)
9202
                                   (const_int 1) (const_int 1)
9203
                                   (const_int 1) (const_int 1)
9204
                                   (const_int 1) (const_int 1)
9205
                                   (const_int 1) (const_int 1)
9206
                                   (const_int 1) (const_int 1)
9207
                                   (const_int 1) (const_int 1)
9208
                                   (const_int 1) (const_int 1)]))
9209
            (const_int 1))))]
9210
  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9211
  "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9212
  [(set_attr "type" "sseimul")
9213
   (set_attr "prefix_extra" "1")
9214
   (set_attr "prefix" "vex")
9215
   (set_attr "mode" "OI")])
9216
 
9217
(define_expand "ssse3_pmulhrswv8hi3"
9218
  [(set (match_operand:V8HI 0 "register_operand" "")
9219
        (truncate:V8HI
9220
          (lshiftrt:V8SI
9221
            (plus:V8SI
9222
              (lshiftrt:V8SI
9223
                (mult:V8SI
9224
                  (sign_extend:V8SI
9225
                    (match_operand:V8HI 1 "nonimmediate_operand" ""))
9226
                  (sign_extend:V8SI
9227
                    (match_operand:V8HI 2 "nonimmediate_operand" "")))
9228
                (const_int 14))
9229
              (const_vector:V8HI [(const_int 1) (const_int 1)
9230
                                  (const_int 1) (const_int 1)
9231
                                  (const_int 1) (const_int 1)
9232
                                  (const_int 1) (const_int 1)]))
9233
            (const_int 1))))]
9234
  "TARGET_SSSE3"
9235
  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9236
 
9237
(define_insn "*ssse3_pmulhrswv8hi3"
9238
  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9239
        (truncate:V8HI
9240
          (lshiftrt:V8SI
9241
            (plus:V8SI
9242
              (lshiftrt:V8SI
9243
                (mult:V8SI
9244
                  (sign_extend:V8SI
9245
                    (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9246
                  (sign_extend:V8SI
9247
                    (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9248
                (const_int 14))
9249
              (const_vector:V8HI [(const_int 1) (const_int 1)
9250
                                  (const_int 1) (const_int 1)
9251
                                  (const_int 1) (const_int 1)
9252
                                  (const_int 1) (const_int 1)]))
9253
            (const_int 1))))]
9254
  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9255
  "@
9256
   pmulhrsw\t{%2, %0|%0, %2}
9257
   vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9258
  [(set_attr "isa" "noavx,avx")
9259
   (set_attr "type" "sseimul")
9260
   (set_attr "prefix_data16" "1,*")
9261
   (set_attr "prefix_extra" "1")
9262
   (set_attr "prefix" "orig,vex")
9263
   (set_attr "mode" "TI")])
9264
 
9265
(define_expand "ssse3_pmulhrswv4hi3"
9266
  [(set (match_operand:V4HI 0 "register_operand" "")
9267
        (truncate:V4HI
9268
          (lshiftrt:V4SI
9269
            (plus:V4SI
9270
              (lshiftrt:V4SI
9271
                (mult:V4SI
9272
                  (sign_extend:V4SI
9273
                    (match_operand:V4HI 1 "nonimmediate_operand" ""))
9274
                  (sign_extend:V4SI
9275
                    (match_operand:V4HI 2 "nonimmediate_operand" "")))
9276
                (const_int 14))
9277
              (const_vector:V4HI [(const_int 1) (const_int 1)
9278
                                  (const_int 1) (const_int 1)]))
9279
            (const_int 1))))]
9280
  "TARGET_SSSE3"
9281
  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9282
 
9283
(define_insn "*ssse3_pmulhrswv4hi3"
9284
  [(set (match_operand:V4HI 0 "register_operand" "=y")
9285
        (truncate:V4HI
9286
          (lshiftrt:V4SI
9287
            (plus:V4SI
9288
              (lshiftrt:V4SI
9289
                (mult:V4SI
9290
                  (sign_extend:V4SI
9291
                    (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9292
                  (sign_extend:V4SI
9293
                    (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9294
                (const_int 14))
9295
              (const_vector:V4HI [(const_int 1) (const_int 1)
9296
                                  (const_int 1) (const_int 1)]))
9297
            (const_int 1))))]
9298
  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9299
  "pmulhrsw\t{%2, %0|%0, %2}"
9300
  [(set_attr "type" "sseimul")
9301
   (set_attr "prefix_extra" "1")
9302
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9303
   (set_attr "mode" "DI")])
9304
 
9305
(define_insn "_pshufb3"
9306
  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9307
        (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9308
                          (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9309
                         UNSPEC_PSHUFB))]
9310
  "TARGET_SSSE3"
9311
  "@
9312
   pshufb\t{%2, %0|%0, %2}
9313
   vpshufb\t{%2, %1, %0|%0, %1, %2}"
9314
  [(set_attr "isa" "noavx,avx")
9315
   (set_attr "type" "sselog1")
9316
   (set_attr "prefix_data16" "1,*")
9317
   (set_attr "prefix_extra" "1")
9318
   (set_attr "prefix" "orig,vex")
9319
   (set_attr "mode" "")])
9320
 
9321
(define_insn "ssse3_pshufbv8qi3"
9322
  [(set (match_operand:V8QI 0 "register_operand" "=y")
9323
        (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9324
                      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9325
                     UNSPEC_PSHUFB))]
9326
  "TARGET_SSSE3"
9327
  "pshufb\t{%2, %0|%0, %2}";
9328
  [(set_attr "type" "sselog1")
9329
   (set_attr "prefix_extra" "1")
9330
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9331
   (set_attr "mode" "DI")])
9332
 
9333
(define_insn "_psign3"
9334
  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9335
        (unspec:VI124_AVX2
9336
          [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9337
           (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9338
          UNSPEC_PSIGN))]
9339
  "TARGET_SSSE3"
9340
  "@
9341
   psign\t{%2, %0|%0, %2}
9342
   vpsign\t{%2, %1, %0|%0, %1, %2}"
9343
  [(set_attr "isa" "noavx,avx")
9344
   (set_attr "type" "sselog1")
9345
   (set_attr "prefix_data16" "1,*")
9346
   (set_attr "prefix_extra" "1")
9347
   (set_attr "prefix" "orig,vex")
9348
   (set_attr "mode" "")])
9349
 
9350
(define_insn "ssse3_psign3"
9351
  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9352
        (unspec:MMXMODEI
9353
          [(match_operand:MMXMODEI 1 "register_operand" "0")
9354
           (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9355
          UNSPEC_PSIGN))]
9356
  "TARGET_SSSE3"
9357
  "psign\t{%2, %0|%0, %2}";
9358
  [(set_attr "type" "sselog1")
9359
   (set_attr "prefix_extra" "1")
9360
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9361
   (set_attr "mode" "DI")])
9362
 
9363
(define_insn "_palignr"
9364
  [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9365
        (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9366
                               (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9367
                               (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9368
                              UNSPEC_PALIGNR))]
9369
  "TARGET_SSSE3"
9370
{
9371
  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9372
 
9373
  switch (which_alternative)
9374
    {
9375
    case 0:
9376
      return "palignr\t{%3, %2, %0|%0, %2, %3}";
9377
    case 1:
9378
      return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9379
    default:
9380
      gcc_unreachable ();
9381
    }
9382
}
9383
  [(set_attr "isa" "noavx,avx")
9384
   (set_attr "type" "sseishft")
9385
   (set_attr "atom_unit" "sishuf")
9386
   (set_attr "prefix_data16" "1,*")
9387
   (set_attr "prefix_extra" "1")
9388
   (set_attr "length_immediate" "1")
9389
   (set_attr "prefix" "orig,vex")
9390
   (set_attr "mode" "")])
9391
 
9392
(define_insn "ssse3_palignrdi"
9393
  [(set (match_operand:DI 0 "register_operand" "=y")
9394
        (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9395
                    (match_operand:DI 2 "nonimmediate_operand" "ym")
9396
                    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9397
                   UNSPEC_PALIGNR))]
9398
  "TARGET_SSSE3"
9399
{
9400
  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9401
  return "palignr\t{%3, %2, %0|%0, %2, %3}";
9402
}
9403
  [(set_attr "type" "sseishft")
9404
   (set_attr "atom_unit" "sishuf")
9405
   (set_attr "prefix_extra" "1")
9406
   (set_attr "length_immediate" "1")
9407
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9408
   (set_attr "mode" "DI")])
9409
 
9410
(define_insn "abs2"
9411
  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9412
        (abs:VI124_AVX2
9413
          (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9414
  "TARGET_SSSE3"
9415
  "%vpabs\t{%1, %0|%0, %1}"
9416
  [(set_attr "type" "sselog1")
9417
   (set_attr "prefix_data16" "1")
9418
   (set_attr "prefix_extra" "1")
9419
   (set_attr "prefix" "maybe_vex")
9420
   (set_attr "mode" "")])
9421
 
9422
(define_insn "abs2"
9423
  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9424
        (abs:MMXMODEI
9425
          (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9426
  "TARGET_SSSE3"
9427
  "pabs\t{%1, %0|%0, %1}";
9428
  [(set_attr "type" "sselog1")
9429
   (set_attr "prefix_rep" "0")
9430
   (set_attr "prefix_extra" "1")
9431
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9432
   (set_attr "mode" "DI")])
9433
 
9434
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9435
;;
9436
;; AMD SSE4A instructions
9437
;;
9438
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9439
 
9440
(define_insn "sse4a_movnt"
9441
  [(set (match_operand:MODEF 0 "memory_operand" "=m")
9442
        (unspec:MODEF
9443
          [(match_operand:MODEF 1 "register_operand" "x")]
9444
          UNSPEC_MOVNT))]
9445
  "TARGET_SSE4A"
9446
  "movnt\t{%1, %0|%0, %1}"
9447
  [(set_attr "type" "ssemov")
9448
   (set_attr "mode" "")])
9449
 
9450
(define_insn "sse4a_vmmovnt"
9451
  [(set (match_operand: 0 "memory_operand" "=m")
9452
        (unspec:
9453
          [(vec_select:
9454
             (match_operand:VF_128 1 "register_operand" "x")
9455
             (parallel [(const_int 0)]))]
9456
          UNSPEC_MOVNT))]
9457
  "TARGET_SSE4A"
9458
  "movnt\t{%1, %0|%0, %1}"
9459
  [(set_attr "type" "ssemov")
9460
   (set_attr "mode" "")])
9461
 
9462
(define_insn "sse4a_extrqi"
9463
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9464
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9465
                      (match_operand 2 "const_0_to_255_operand" "")
9466
                      (match_operand 3 "const_0_to_255_operand" "")]
9467
                     UNSPEC_EXTRQI))]
9468
  "TARGET_SSE4A"
9469
  "extrq\t{%3, %2, %0|%0, %2, %3}"
9470
  [(set_attr "type" "sse")
9471
   (set_attr "prefix_data16" "1")
9472
   (set_attr "length_immediate" "2")
9473
   (set_attr "mode" "TI")])
9474
 
9475
(define_insn "sse4a_extrq"
9476
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9477
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9478
                      (match_operand:V16QI 2 "register_operand" "x")]
9479
                     UNSPEC_EXTRQ))]
9480
  "TARGET_SSE4A"
9481
  "extrq\t{%2, %0|%0, %2}"
9482
  [(set_attr "type" "sse")
9483
   (set_attr "prefix_data16" "1")
9484
   (set_attr "mode" "TI")])
9485
 
9486
(define_insn "sse4a_insertqi"
9487
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9488
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9489
                      (match_operand:V2DI 2 "register_operand" "x")
9490
                      (match_operand 3 "const_0_to_255_operand" "")
9491
                      (match_operand 4 "const_0_to_255_operand" "")]
9492
                     UNSPEC_INSERTQI))]
9493
  "TARGET_SSE4A"
9494
  "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9495
  [(set_attr "type" "sseins")
9496
   (set_attr "prefix_data16" "0")
9497
   (set_attr "prefix_rep" "1")
9498
   (set_attr "length_immediate" "2")
9499
   (set_attr "mode" "TI")])
9500
 
9501
(define_insn "sse4a_insertq"
9502
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9503
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9504
                      (match_operand:V2DI 2 "register_operand" "x")]
9505
                     UNSPEC_INSERTQ))]
9506
  "TARGET_SSE4A"
9507
  "insertq\t{%2, %0|%0, %2}"
9508
  [(set_attr "type" "sseins")
9509
   (set_attr "prefix_data16" "0")
9510
   (set_attr "prefix_rep" "1")
9511
   (set_attr "mode" "TI")])
9512
 
9513
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9514
;;
9515
;; Intel SSE4.1 instructions
9516
;;
9517
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9518
 
9519
(define_insn "_blend"
9520
  [(set (match_operand:VF 0 "register_operand" "=x,x")
9521
        (vec_merge:VF
9522
          (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9523
          (match_operand:VF 1 "register_operand" "0,x")
9524
          (match_operand:SI 3 "const_0_to__operand" "")))]
9525
  "TARGET_SSE4_1"
9526
  "@
9527
   blend\t{%3, %2, %0|%0, %2, %3}
9528
   vblend\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9529
  [(set_attr "isa" "noavx,avx")
9530
   (set_attr "type" "ssemov")
9531
   (set_attr "length_immediate" "1")
9532
   (set_attr "prefix_data16" "1,*")
9533
   (set_attr "prefix_extra" "1")
9534
   (set_attr "prefix" "orig,vex")
9535
   (set_attr "mode" "")])
9536
 
9537
(define_insn "_blendv"
9538
  [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9539
        (unspec:VF
9540
          [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9541
           (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9542
           (match_operand:VF 3 "register_operand" "Yz,x")]
9543
          UNSPEC_BLENDV))]
9544
  "TARGET_SSE4_1"
9545
  "@
9546
   blendv\t{%3, %2, %0|%0, %2, %3}
9547
   vblendv\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9548
  [(set_attr "isa" "noavx,avx")
9549
   (set_attr "type" "ssemov")
9550
   (set_attr "length_immediate" "1")
9551
   (set_attr "prefix_data16" "1,*")
9552
   (set_attr "prefix_extra" "1")
9553
   (set_attr "prefix" "orig,vex")
9554
   (set_attr "mode" "")])
9555
 
9556
(define_insn "_dp"
9557
  [(set (match_operand:VF 0 "register_operand" "=x,x")
9558
        (unspec:VF
9559
          [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9560
           (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9561
           (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9562
          UNSPEC_DP))]
9563
  "TARGET_SSE4_1"
9564
  "@
9565
   dp\t{%3, %2, %0|%0, %2, %3}
9566
   vdp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9567
  [(set_attr "isa" "noavx,avx")
9568
   (set_attr "type" "ssemul")
9569
   (set_attr "length_immediate" "1")
9570
   (set_attr "prefix_data16" "1,*")
9571
   (set_attr "prefix_extra" "1")
9572
   (set_attr "prefix" "orig,vex")
9573
   (set_attr "mode" "")])
9574
 
9575
(define_insn "_movntdqa"
9576
  [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9577
        (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9578
                     UNSPEC_MOVNTDQA))]
9579
  "TARGET_SSE4_1"
9580
  "%vmovntdqa\t{%1, %0|%0, %1}"
9581
  [(set_attr "type" "ssemov")
9582
   (set_attr "prefix_extra" "1")
9583
   (set_attr "prefix" "maybe_vex")
9584
   (set_attr "mode" "")])
9585
 
9586
(define_insn "_mpsadbw"
9587
  [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9588
        (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9589
                          (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9590
                          (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9591
                         UNSPEC_MPSADBW))]
9592
  "TARGET_SSE4_1"
9593
  "@
9594
   mpsadbw\t{%3, %2, %0|%0, %2, %3}
9595
   vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9596
  [(set_attr "isa" "noavx,avx")
9597
   (set_attr "type" "sselog1")
9598
   (set_attr "length_immediate" "1")
9599
   (set_attr "prefix_extra" "1")
9600
   (set_attr "prefix" "orig,vex")
9601
   (set_attr "mode" "")])
9602
 
9603
(define_insn "avx2_packusdw"
9604
  [(set (match_operand:V16HI 0 "register_operand" "=x")
9605
        (vec_concat:V16HI
9606
          (us_truncate:V8HI
9607
            (match_operand:V8SI 1 "register_operand" "x"))
9608
          (us_truncate:V8HI
9609
            (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9610
  "TARGET_AVX2"
9611
  "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9612
  [(set_attr "type" "sselog")
9613
   (set_attr "prefix_extra" "1")
9614
   (set_attr "prefix" "vex")
9615
   (set_attr "mode" "OI")])
9616
 
9617
(define_insn "sse4_1_packusdw"
9618
  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9619
        (vec_concat:V8HI
9620
          (us_truncate:V4HI
9621
            (match_operand:V4SI 1 "register_operand" "0,x"))
9622
          (us_truncate:V4HI
9623
            (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9624
  "TARGET_SSE4_1"
9625
  "@
9626
   packusdw\t{%2, %0|%0, %2}
9627
   vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9628
  [(set_attr "isa" "noavx,avx")
9629
   (set_attr "type" "sselog")
9630
   (set_attr "prefix_extra" "1")
9631
   (set_attr "prefix" "orig,vex")
9632
   (set_attr "mode" "TI")])
9633
 
9634
(define_insn "_pblendvb"
9635
  [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
9636
        (unspec:VI1_AVX2
9637
          [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx"  "0,x")
9638
           (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9639
           (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9640
          UNSPEC_BLENDV))]
9641
  "TARGET_SSE4_1"
9642
  "@
9643
   pblendvb\t{%3, %2, %0|%0, %2, %3}
9644
   vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9645
  [(set_attr "isa" "noavx,avx")
9646
   (set_attr "type" "ssemov")
9647
   (set_attr "prefix_extra" "1")
9648
   (set_attr "length_immediate" "*,1")
9649
   (set_attr "prefix" "orig,vex")
9650
   (set_attr "mode" "")])
9651
 
9652
(define_insn "sse4_1_pblendw"
9653
  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9654
        (vec_merge:V8HI
9655
          (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9656
          (match_operand:V8HI 1 "register_operand" "0,x")
9657
          (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9658
  "TARGET_SSE4_1"
9659
  "@
9660
   pblendw\t{%3, %2, %0|%0, %2, %3}
9661
   vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9662
  [(set_attr "isa" "noavx,avx")
9663
   (set_attr "type" "ssemov")
9664
   (set_attr "prefix_extra" "1")
9665
   (set_attr "length_immediate" "1")
9666
   (set_attr "prefix" "orig,vex")
9667
   (set_attr "mode" "TI")])
9668
 
9669
;; The builtin uses an 8-bit immediate.  Expand that.
9670
(define_expand "avx2_pblendw"
9671
  [(set (match_operand:V16HI 0 "register_operand" "")
9672
        (vec_merge:V16HI
9673
          (match_operand:V16HI 2 "nonimmediate_operand" "")
9674
          (match_operand:V16HI 1 "register_operand" "")
9675
          (match_operand:SI 3 "const_0_to_255_operand" "")))]
9676
  "TARGET_AVX2"
9677
{
9678
  HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
9679
  operands[3] = GEN_INT (val << 8 | val);
9680
})
9681
 
9682
(define_insn "*avx2_pblendw"
9683
  [(set (match_operand:V16HI 0 "register_operand" "=x")
9684
        (vec_merge:V16HI
9685
          (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9686
          (match_operand:V16HI 1 "register_operand" "x")
9687
          (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
9688
  "TARGET_AVX2"
9689
{
9690
  operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
9691
  return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9692
}
9693
  [(set_attr "type" "ssemov")
9694
   (set_attr "prefix_extra" "1")
9695
   (set_attr "length_immediate" "1")
9696
   (set_attr "prefix" "vex")
9697
   (set_attr "mode" "OI")])
9698
 
9699
(define_insn "avx2_pblendd"
9700
  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9701
        (vec_merge:VI4_AVX2
9702
          (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9703
          (match_operand:VI4_AVX2 1 "register_operand" "x")
9704
          (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9705
  "TARGET_AVX2"
9706
  "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9707
  [(set_attr "type" "ssemov")
9708
   (set_attr "prefix_extra" "1")
9709
   (set_attr "length_immediate" "1")
9710
   (set_attr "prefix" "vex")
9711
   (set_attr "mode" "")])
9712
 
9713
(define_insn "sse4_1_phminposuw"
9714
  [(set (match_operand:V8HI 0 "register_operand" "=x")
9715
        (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9716
                     UNSPEC_PHMINPOSUW))]
9717
  "TARGET_SSE4_1"
9718
  "%vphminposuw\t{%1, %0|%0, %1}"
9719
  [(set_attr "type" "sselog1")
9720
   (set_attr "prefix_extra" "1")
9721
   (set_attr "prefix" "maybe_vex")
9722
   (set_attr "mode" "TI")])
9723
 
9724
(define_insn "avx2_v16qiv16hi2"
9725
  [(set (match_operand:V16HI 0 "register_operand" "=x")
9726
        (any_extend:V16HI
9727
          (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9728
  "TARGET_AVX2"
9729
  "vpmovbw\t{%1, %0|%0, %1}"
9730
  [(set_attr "type" "ssemov")
9731
   (set_attr "prefix_extra" "1")
9732
   (set_attr "prefix" "vex")
9733
   (set_attr "mode" "OI")])
9734
 
9735
(define_insn "sse4_1_v8qiv8hi2"
9736
  [(set (match_operand:V8HI 0 "register_operand" "=x")
9737
        (any_extend:V8HI
9738
          (vec_select:V8QI
9739
            (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9740
            (parallel [(const_int 0)
9741
                       (const_int 1)
9742
                       (const_int 2)
9743
                       (const_int 3)
9744
                       (const_int 4)
9745
                       (const_int 5)
9746
                       (const_int 6)
9747
                       (const_int 7)]))))]
9748
  "TARGET_SSE4_1"
9749
  "%vpmovbw\t{%1, %0|%0, %q1}"
9750
  [(set_attr "type" "ssemov")
9751
   (set_attr "prefix_extra" "1")
9752
   (set_attr "prefix" "maybe_vex")
9753
   (set_attr "mode" "TI")])
9754
 
9755
(define_insn "avx2_v8qiv8si2"
9756
  [(set (match_operand:V8SI 0 "register_operand" "=x")
9757
        (any_extend:V8SI
9758
          (vec_select:V8QI
9759
            (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9760
            (parallel [(const_int 0)
9761
                       (const_int 1)
9762
                       (const_int 2)
9763
                       (const_int 3)
9764
                       (const_int 4)
9765
                       (const_int 5)
9766
                       (const_int 6)
9767
                       (const_int 7)]))))]
9768
  "TARGET_AVX2"
9769
  "vpmovbd\t{%1, %0|%0, %q1}"
9770
  [(set_attr "type" "ssemov")
9771
   (set_attr "prefix_extra" "1")
9772
   (set_attr "prefix" "vex")
9773
   (set_attr "mode" "OI")])
9774
 
9775
(define_insn "sse4_1_v4qiv4si2"
9776
  [(set (match_operand:V4SI 0 "register_operand" "=x")
9777
        (any_extend:V4SI
9778
          (vec_select:V4QI
9779
            (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9780
            (parallel [(const_int 0)
9781
                       (const_int 1)
9782
                       (const_int 2)
9783
                       (const_int 3)]))))]
9784
  "TARGET_SSE4_1"
9785
  "%vpmovbd\t{%1, %0|%0, %k1}"
9786
  [(set_attr "type" "ssemov")
9787
   (set_attr "prefix_extra" "1")
9788
   (set_attr "prefix" "maybe_vex")
9789
   (set_attr "mode" "TI")])
9790
 
9791
(define_insn "avx2_v8hiv8si2"
9792
  [(set (match_operand:V8SI 0 "register_operand" "=x")
9793
        (any_extend:V8SI
9794
            (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9795
  "TARGET_AVX2"
9796
  "vpmovwd\t{%1, %0|%0, %1}"
9797
  [(set_attr "type" "ssemov")
9798
   (set_attr "prefix_extra" "1")
9799
   (set_attr "prefix" "vex")
9800
   (set_attr "mode" "OI")])
9801
 
9802
(define_insn "sse4_1_v4hiv4si2"
9803
  [(set (match_operand:V4SI 0 "register_operand" "=x")
9804
        (any_extend:V4SI
9805
          (vec_select:V4HI
9806
            (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9807
            (parallel [(const_int 0)
9808
                       (const_int 1)
9809
                       (const_int 2)
9810
                       (const_int 3)]))))]
9811
  "TARGET_SSE4_1"
9812
  "%vpmovwd\t{%1, %0|%0, %q1}"
9813
  [(set_attr "type" "ssemov")
9814
   (set_attr "prefix_extra" "1")
9815
   (set_attr "prefix" "maybe_vex")
9816
   (set_attr "mode" "TI")])
9817
 
9818
(define_insn "avx2_v4qiv4di2"
9819
  [(set (match_operand:V4DI 0 "register_operand" "=x")
9820
        (any_extend:V4DI
9821
          (vec_select:V4QI
9822
            (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9823
            (parallel [(const_int 0)
9824
                       (const_int 1)
9825
                       (const_int 2)
9826
                       (const_int 3)]))))]
9827
  "TARGET_AVX2"
9828
  "vpmovbq\t{%1, %0|%0, %k1}"
9829
  [(set_attr "type" "ssemov")
9830
   (set_attr "prefix_extra" "1")
9831
   (set_attr "prefix" "vex")
9832
   (set_attr "mode" "OI")])
9833
 
9834
(define_insn "sse4_1_v2qiv2di2"
9835
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9836
        (any_extend:V2DI
9837
          (vec_select:V2QI
9838
            (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9839
            (parallel [(const_int 0)
9840
                       (const_int 1)]))))]
9841
  "TARGET_SSE4_1"
9842
  "%vpmovbq\t{%1, %0|%0, %w1}"
9843
  [(set_attr "type" "ssemov")
9844
   (set_attr "prefix_extra" "1")
9845
   (set_attr "prefix" "maybe_vex")
9846
   (set_attr "mode" "TI")])
9847
 
9848
(define_insn "avx2_v4hiv4di2"
9849
  [(set (match_operand:V4DI 0 "register_operand" "=x")
9850
        (any_extend:V4DI
9851
          (vec_select:V4HI
9852
            (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9853
            (parallel [(const_int 0)
9854
                       (const_int 1)
9855
                       (const_int 2)
9856
                       (const_int 3)]))))]
9857
  "TARGET_AVX2"
9858
  "vpmovwq\t{%1, %0|%0, %q1}"
9859
  [(set_attr "type" "ssemov")
9860
   (set_attr "prefix_extra" "1")
9861
   (set_attr "prefix" "vex")
9862
   (set_attr "mode" "OI")])
9863
 
9864
(define_insn "sse4_1_v2hiv2di2"
9865
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9866
        (any_extend:V2DI
9867
          (vec_select:V2HI
9868
            (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9869
            (parallel [(const_int 0)
9870
                       (const_int 1)]))))]
9871
  "TARGET_SSE4_1"
9872
  "%vpmovwq\t{%1, %0|%0, %k1}"
9873
  [(set_attr "type" "ssemov")
9874
   (set_attr "prefix_extra" "1")
9875
   (set_attr "prefix" "maybe_vex")
9876
   (set_attr "mode" "TI")])
9877
 
9878
(define_insn "avx2_v4siv4di2"
9879
  [(set (match_operand:V4DI 0 "register_operand" "=x")
9880
        (any_extend:V4DI
9881
            (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9882
  "TARGET_AVX2"
9883
  "vpmovdq\t{%1, %0|%0, %1}"
9884
  [(set_attr "type" "ssemov")
9885
   (set_attr "prefix_extra" "1")
9886
   (set_attr "mode" "OI")])
9887
 
9888
(define_insn "sse4_1_v2siv2di2"
9889
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9890
        (any_extend:V2DI
9891
          (vec_select:V2SI
9892
            (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9893
            (parallel [(const_int 0)
9894
                       (const_int 1)]))))]
9895
  "TARGET_SSE4_1"
9896
  "%vpmovdq\t{%1, %0|%0, %q1}"
9897
  [(set_attr "type" "ssemov")
9898
   (set_attr "prefix_extra" "1")
9899
   (set_attr "prefix" "maybe_vex")
9900
   (set_attr "mode" "TI")])
9901
 
9902
;; ptestps/ptestpd are very similar to comiss and ucomiss when
9903
;; setting FLAGS_REG. But it is not a really compare instruction.
9904
(define_insn "avx_vtest"
9905
  [(set (reg:CC FLAGS_REG)
9906
        (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9907
                    (match_operand:VF 1 "nonimmediate_operand" "xm")]
9908
                   UNSPEC_VTESTP))]
9909
  "TARGET_AVX"
9910
  "vtest\t{%1, %0|%0, %1}"
9911
  [(set_attr "type" "ssecomi")
9912
   (set_attr "prefix_extra" "1")
9913
   (set_attr "prefix" "vex")
9914
   (set_attr "mode" "")])
9915
 
9916
;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9917
;; But it is not a really compare instruction.
9918
(define_insn "avx_ptest256"
9919
  [(set (reg:CC FLAGS_REG)
9920
        (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9921
                    (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9922
                   UNSPEC_PTEST))]
9923
  "TARGET_AVX"
9924
  "vptest\t{%1, %0|%0, %1}"
9925
  [(set_attr "type" "ssecomi")
9926
   (set_attr "prefix_extra" "1")
9927
   (set_attr "prefix" "vex")
9928
   (set_attr "mode" "OI")])
9929
 
9930
(define_insn "sse4_1_ptest"
9931
  [(set (reg:CC FLAGS_REG)
9932
        (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9933
                    (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9934
                   UNSPEC_PTEST))]
9935
  "TARGET_SSE4_1"
9936
  "%vptest\t{%1, %0|%0, %1}"
9937
  [(set_attr "type" "ssecomi")
9938
   (set_attr "prefix_extra" "1")
9939
   (set_attr "prefix" "maybe_vex")
9940
   (set_attr "mode" "TI")])
9941
 
9942
(define_insn "_round"
9943
  [(set (match_operand:VF 0 "register_operand" "=x")
9944
        (unspec:VF
9945
          [(match_operand:VF 1 "nonimmediate_operand" "xm")
9946
           (match_operand:SI 2 "const_0_to_15_operand" "n")]
9947
          UNSPEC_ROUND))]
9948
  "TARGET_ROUND"
9949
  "%vround\t{%2, %1, %0|%0, %1, %2}"
9950
  [(set_attr "type" "ssecvt")
9951
   (set (attr "prefix_data16")
9952
     (if_then_else
9953
       (match_test "TARGET_AVX")
9954
     (const_string "*")
9955
     (const_string "1")))
9956
   (set_attr "prefix_extra" "1")
9957
   (set_attr "length_immediate" "1")
9958
   (set_attr "prefix" "maybe_vex")
9959
   (set_attr "mode" "")])
9960
 
9961
(define_expand "_round_sfix"
9962
  [(match_operand: 0 "register_operand" "")
9963
   (match_operand:VF1 1 "nonimmediate_operand" "")
9964
   (match_operand:SI 2 "const_0_to_15_operand" "")]
9965
  "TARGET_ROUND"
9966
{
9967
  rtx tmp = gen_reg_rtx (mode);
9968
 
9969
  emit_insn
9970
    (gen__round (tmp, operands[1],
9971
                                                       operands[2]));
9972
  emit_insn
9973
    (gen_fix_trunc2 (operands[0], tmp));
9974
  DONE;
9975
})
9976
 
9977
(define_expand "_round_vec_pack_sfix"
9978
  [(match_operand: 0 "register_operand" "")
9979
   (match_operand:VF2 1 "nonimmediate_operand" "")
9980
   (match_operand:VF2 2 "nonimmediate_operand" "")
9981
   (match_operand:SI 3 "const_0_to_15_operand" "")]
9982
  "TARGET_ROUND"
9983
{
9984
  rtx tmp0, tmp1;
9985
 
9986
  if (mode == V2DFmode
9987
      && TARGET_AVX && !TARGET_PREFER_AVX128)
9988
    {
9989
      rtx tmp2 = gen_reg_rtx (V4DFmode);
9990
 
9991
      tmp0 = gen_reg_rtx (V4DFmode);
9992
      tmp1 = force_reg (V2DFmode, operands[1]);
9993
 
9994
      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9995
      emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
9996
      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
9997
    }
9998
  else
9999
    {
10000
      tmp0 = gen_reg_rtx (mode);
10001
      tmp1 = gen_reg_rtx (mode);
10002
 
10003
      emit_insn
10004
       (gen__round (tmp0, operands[1],
10005
                                                          operands[3]));
10006
      emit_insn
10007
       (gen__round (tmp1, operands[2],
10008
                                                          operands[3]));
10009
      emit_insn
10010
       (gen_vec_pack_sfix_trunc_ (operands[0], tmp0, tmp1));
10011
    }
10012
  DONE;
10013
})
10014
 
10015
(define_insn "sse4_1_round"
10016
  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
10017
        (vec_merge:VF_128
10018
          (unspec:VF_128
10019
            [(match_operand:VF_128 2 "register_operand" "x,x")
10020
             (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
10021
            UNSPEC_ROUND)
10022
          (match_operand:VF_128 1 "register_operand" "0,x")
10023
          (const_int 1)))]
10024
  "TARGET_ROUND"
10025
  "@
10026
   round\t{%3, %2, %0|%0, %2, %3}
10027
   vround\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10028
  [(set_attr "isa" "noavx,avx")
10029
   (set_attr "type" "ssecvt")
10030
   (set_attr "length_immediate" "1")
10031
   (set_attr "prefix_data16" "1,*")
10032
   (set_attr "prefix_extra" "1")
10033
   (set_attr "prefix" "orig,vex")
10034
   (set_attr "mode" "")])
10035
 
10036
(define_expand "round2"
10037
  [(set (match_dup 4)
10038
        (plus:VF
10039
          (match_operand:VF 1 "register_operand" "")
10040
          (match_dup 3)))
10041
   (set (match_operand:VF 0 "register_operand" "")
10042
        (unspec:VF
10043
          [(match_dup 4) (match_dup 5)]
10044
          UNSPEC_ROUND))]
10045
  "TARGET_ROUND && !flag_trapping_math"
10046
{
10047
  enum machine_mode scalar_mode;
10048
  const struct real_format *fmt;
10049
  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
10050
  rtx half, vec_half;
10051
 
10052
  scalar_mode = GET_MODE_INNER (mode);
10053
 
10054
  /* load nextafter (0.5, 0.0) */
10055
  fmt = REAL_MODE_FORMAT (scalar_mode);
10056
  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
10057
  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
10058
  half = const_double_from_real_value (pred_half, scalar_mode);
10059
 
10060
  vec_half = ix86_build_const_vector (mode, true, half);
10061
  vec_half = force_reg (mode, vec_half);
10062
 
10063
  operands[3] = gen_reg_rtx (mode);
10064
  emit_insn (gen_copysign3 (operands[3], vec_half, operands[1]));
10065
 
10066
  operands[4] = gen_reg_rtx (mode);
10067
  operands[5] = GEN_INT (ROUND_TRUNC);
10068
})
10069
 
10070
(define_expand "round2_sfix"
10071
  [(match_operand: 0 "register_operand" "")
10072
   (match_operand:VF1 1 "register_operand" "")]
10073
  "TARGET_ROUND && !flag_trapping_math"
10074
{
10075
  rtx tmp = gen_reg_rtx (mode);
10076
 
10077
  emit_insn (gen_round2 (tmp, operands[1]));
10078
 
10079
  emit_insn
10080
    (gen_fix_trunc2 (operands[0], tmp));
10081
  DONE;
10082
})
10083
 
10084
(define_expand "round2_vec_pack_sfix"
10085
  [(match_operand: 0 "register_operand" "")
10086
   (match_operand:VF2 1 "register_operand" "")
10087
   (match_operand:VF2 2 "register_operand" "")]
10088
  "TARGET_ROUND && !flag_trapping_math"
10089
{
10090
  rtx tmp0, tmp1;
10091
 
10092
  if (mode == V2DFmode
10093
      && TARGET_AVX && !TARGET_PREFER_AVX128)
10094
    {
10095
      rtx tmp2 = gen_reg_rtx (V4DFmode);
10096
 
10097
      tmp0 = gen_reg_rtx (V4DFmode);
10098
      tmp1 = force_reg (V2DFmode, operands[1]);
10099
 
10100
      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
10101
      emit_insn (gen_roundv4df2 (tmp2, tmp0));
10102
      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
10103
    }
10104
  else
10105
    {
10106
      tmp0 = gen_reg_rtx (mode);
10107
      tmp1 = gen_reg_rtx (mode);
10108
 
10109
      emit_insn (gen_round2 (tmp0, operands[1]));
10110
      emit_insn (gen_round2 (tmp1, operands[2]));
10111
 
10112
      emit_insn
10113
       (gen_vec_pack_sfix_trunc_ (operands[0], tmp0, tmp1));
10114
    }
10115
  DONE;
10116
})
10117
 
10118
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10119
;;
10120
;; Intel SSE4.2 string/text processing instructions
10121
;;
10122
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10123
 
10124
(define_insn_and_split "sse4_2_pcmpestr"
10125
  [(set (match_operand:SI 0 "register_operand" "=c,c")
10126
        (unspec:SI
10127
          [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10128
           (match_operand:SI 3 "register_operand" "a,a")
10129
           (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10130
           (match_operand:SI 5 "register_operand" "d,d")
10131
           (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10132
          UNSPEC_PCMPESTR))
10133
   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10134
        (unspec:V16QI
10135
          [(match_dup 2)
10136
           (match_dup 3)
10137
           (match_dup 4)
10138
           (match_dup 5)
10139
           (match_dup 6)]
10140
          UNSPEC_PCMPESTR))
10141
   (set (reg:CC FLAGS_REG)
10142
        (unspec:CC
10143
          [(match_dup 2)
10144
           (match_dup 3)
10145
           (match_dup 4)
10146
           (match_dup 5)
10147
           (match_dup 6)]
10148
          UNSPEC_PCMPESTR))]
10149
  "TARGET_SSE4_2
10150
   && can_create_pseudo_p ()"
10151
  "#"
10152
  "&& 1"
10153
  [(const_int 0)]
10154
{
10155
  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10156
  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10157
  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10158
 
10159
  if (ecx)
10160
    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10161
                                     operands[3], operands[4],
10162
                                     operands[5], operands[6]));
10163
  if (xmm0)
10164
    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10165
                                     operands[3], operands[4],
10166
                                     operands[5], operands[6]));
10167
  if (flags && !(ecx || xmm0))
10168
    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10169
                                           operands[2], operands[3],
10170
                                           operands[4], operands[5],
10171
                                           operands[6]));
10172
  if (!(flags || ecx || xmm0))
10173
    emit_note (NOTE_INSN_DELETED);
10174
 
10175
  DONE;
10176
}
10177
  [(set_attr "type" "sselog")
10178
   (set_attr "prefix_data16" "1")
10179
   (set_attr "prefix_extra" "1")
10180
   (set_attr "length_immediate" "1")
10181
   (set_attr "memory" "none,load")
10182
   (set_attr "mode" "TI")])
10183
 
10184
(define_insn "sse4_2_pcmpestri"
10185
  [(set (match_operand:SI 0 "register_operand" "=c,c")
10186
        (unspec:SI
10187
          [(match_operand:V16QI 1 "register_operand" "x,x")
10188
           (match_operand:SI 2 "register_operand" "a,a")
10189
           (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10190
           (match_operand:SI 4 "register_operand" "d,d")
10191
           (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10192
          UNSPEC_PCMPESTR))
10193
   (set (reg:CC FLAGS_REG)
10194
        (unspec:CC
10195
          [(match_dup 1)
10196
           (match_dup 2)
10197
           (match_dup 3)
10198
           (match_dup 4)
10199
           (match_dup 5)]
10200
          UNSPEC_PCMPESTR))]
10201
  "TARGET_SSE4_2"
10202
  "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10203
  [(set_attr "type" "sselog")
10204
   (set_attr "prefix_data16" "1")
10205
   (set_attr "prefix_extra" "1")
10206
   (set_attr "prefix" "maybe_vex")
10207
   (set_attr "length_immediate" "1")
10208
   (set_attr "memory" "none,load")
10209
   (set_attr "mode" "TI")])
10210
 
10211
(define_insn "sse4_2_pcmpestrm"
10212
  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10213
        (unspec:V16QI
10214
          [(match_operand:V16QI 1 "register_operand" "x,x")
10215
           (match_operand:SI 2 "register_operand" "a,a")
10216
           (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10217
           (match_operand:SI 4 "register_operand" "d,d")
10218
           (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10219
          UNSPEC_PCMPESTR))
10220
   (set (reg:CC FLAGS_REG)
10221
        (unspec:CC
10222
          [(match_dup 1)
10223
           (match_dup 2)
10224
           (match_dup 3)
10225
           (match_dup 4)
10226
           (match_dup 5)]
10227
          UNSPEC_PCMPESTR))]
10228
  "TARGET_SSE4_2"
10229
  "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10230
  [(set_attr "type" "sselog")
10231
   (set_attr "prefix_data16" "1")
10232
   (set_attr "prefix_extra" "1")
10233
   (set_attr "length_immediate" "1")
10234
   (set_attr "prefix" "maybe_vex")
10235
   (set_attr "memory" "none,load")
10236
   (set_attr "mode" "TI")])
10237
 
10238
(define_insn "sse4_2_pcmpestr_cconly"
10239
  [(set (reg:CC FLAGS_REG)
10240
        (unspec:CC
10241
          [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10242
           (match_operand:SI 3 "register_operand" "a,a,a,a")
10243
           (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10244
           (match_operand:SI 5 "register_operand" "d,d,d,d")
10245
           (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10246
          UNSPEC_PCMPESTR))
10247
   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10248
   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
10249
  "TARGET_SSE4_2"
10250
  "@
10251
   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10252
   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10253
   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10254
   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10255
  [(set_attr "type" "sselog")
10256
   (set_attr "prefix_data16" "1")
10257
   (set_attr "prefix_extra" "1")
10258
   (set_attr "length_immediate" "1")
10259
   (set_attr "memory" "none,load,none,load")
10260
   (set_attr "prefix" "maybe_vex")
10261
   (set_attr "mode" "TI")])
10262
 
10263
(define_insn_and_split "sse4_2_pcmpistr"
10264
  [(set (match_operand:SI 0 "register_operand" "=c,c")
10265
        (unspec:SI
10266
          [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10267
           (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10268
           (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10269
          UNSPEC_PCMPISTR))
10270
   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10271
        (unspec:V16QI
10272
          [(match_dup 2)
10273
           (match_dup 3)
10274
           (match_dup 4)]
10275
          UNSPEC_PCMPISTR))
10276
   (set (reg:CC FLAGS_REG)
10277
        (unspec:CC
10278
          [(match_dup 2)
10279
           (match_dup 3)
10280
           (match_dup 4)]
10281
          UNSPEC_PCMPISTR))]
10282
  "TARGET_SSE4_2
10283
   && can_create_pseudo_p ()"
10284
  "#"
10285
  "&& 1"
10286
  [(const_int 0)]
10287
{
10288
  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10289
  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10290
  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10291
 
10292
  if (ecx)
10293
    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10294
                                     operands[3], operands[4]));
10295
  if (xmm0)
10296
    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10297
                                     operands[3], operands[4]));
10298
  if (flags && !(ecx || xmm0))
10299
    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10300
                                           operands[2], operands[3],
10301
                                           operands[4]));
10302
  if (!(flags || ecx || xmm0))
10303
    emit_note (NOTE_INSN_DELETED);
10304
 
10305
  DONE;
10306
}
10307
  [(set_attr "type" "sselog")
10308
   (set_attr "prefix_data16" "1")
10309
   (set_attr "prefix_extra" "1")
10310
   (set_attr "length_immediate" "1")
10311
   (set_attr "memory" "none,load")
10312
   (set_attr "mode" "TI")])
10313
 
10314
(define_insn "sse4_2_pcmpistri"
10315
  [(set (match_operand:SI 0 "register_operand" "=c,c")
10316
        (unspec:SI
10317
          [(match_operand:V16QI 1 "register_operand" "x,x")
10318
           (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10319
           (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10320
          UNSPEC_PCMPISTR))
10321
   (set (reg:CC FLAGS_REG)
10322
        (unspec:CC
10323
          [(match_dup 1)
10324
           (match_dup 2)
10325
           (match_dup 3)]
10326
          UNSPEC_PCMPISTR))]
10327
  "TARGET_SSE4_2"
10328
  "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10329
  [(set_attr "type" "sselog")
10330
   (set_attr "prefix_data16" "1")
10331
   (set_attr "prefix_extra" "1")
10332
   (set_attr "length_immediate" "1")
10333
   (set_attr "prefix" "maybe_vex")
10334
   (set_attr "memory" "none,load")
10335
   (set_attr "mode" "TI")])
10336
 
10337
(define_insn "sse4_2_pcmpistrm"
10338
  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10339
        (unspec:V16QI
10340
          [(match_operand:V16QI 1 "register_operand" "x,x")
10341
           (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10342
           (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10343
          UNSPEC_PCMPISTR))
10344
   (set (reg:CC FLAGS_REG)
10345
        (unspec:CC
10346
          [(match_dup 1)
10347
           (match_dup 2)
10348
           (match_dup 3)]
10349
          UNSPEC_PCMPISTR))]
10350
  "TARGET_SSE4_2"
10351
  "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10352
  [(set_attr "type" "sselog")
10353
   (set_attr "prefix_data16" "1")
10354
   (set_attr "prefix_extra" "1")
10355
   (set_attr "length_immediate" "1")
10356
   (set_attr "prefix" "maybe_vex")
10357
   (set_attr "memory" "none,load")
10358
   (set_attr "mode" "TI")])
10359
 
10360
(define_insn "sse4_2_pcmpistr_cconly"
10361
  [(set (reg:CC FLAGS_REG)
10362
        (unspec:CC
10363
          [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10364
           (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10365
           (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10366
          UNSPEC_PCMPISTR))
10367
   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10368
   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
10369
  "TARGET_SSE4_2"
10370
  "@
10371
   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10372
   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10373
   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10374
   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10375
  [(set_attr "type" "sselog")
10376
   (set_attr "prefix_data16" "1")
10377
   (set_attr "prefix_extra" "1")
10378
   (set_attr "length_immediate" "1")
10379
   (set_attr "memory" "none,load,none,load")
10380
   (set_attr "prefix" "maybe_vex")
10381
   (set_attr "mode" "TI")])
10382
 
10383
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10384
;;
10385
;; XOP instructions
10386
;;
10387
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10388
 
10389
;; XOP parallel integer multiply/add instructions.
10390
;; Note the XOP multiply/add instructions
10391
;;     a[i] = b[i] * c[i] + d[i];
10392
;; do not allow the value being added to be a memory operation.
10393
(define_insn "xop_pmacsww"
10394
  [(set (match_operand:V8HI 0 "register_operand" "=x")
10395
        (plus:V8HI
10396
         (mult:V8HI
10397
          (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10398
          (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10399
         (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10400
  "TARGET_XOP"
10401
  "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10402
  [(set_attr "type" "ssemuladd")
10403
   (set_attr "mode" "TI")])
10404
 
10405
(define_insn "xop_pmacssww"
10406
  [(set (match_operand:V8HI 0 "register_operand" "=x")
10407
        (ss_plus:V8HI
10408
         (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10409
                    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10410
         (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10411
  "TARGET_XOP"
10412
  "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10413
  [(set_attr "type" "ssemuladd")
10414
   (set_attr "mode" "TI")])
10415
 
10416
(define_insn "xop_pmacsdd"
10417
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10418
        (plus:V4SI
10419
         (mult:V4SI
10420
          (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10421
          (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10422
         (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10423
  "TARGET_XOP"
10424
  "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10425
  [(set_attr "type" "ssemuladd")
10426
   (set_attr "mode" "TI")])
10427
 
10428
(define_insn "xop_pmacssdd"
10429
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10430
        (ss_plus:V4SI
10431
         (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10432
                    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10433
         (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10434
  "TARGET_XOP"
10435
  "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10436
  [(set_attr "type" "ssemuladd")
10437
   (set_attr "mode" "TI")])
10438
 
10439
(define_insn "xop_pmacssdql"
10440
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10441
        (ss_plus:V2DI
10442
         (mult:V2DI
10443
          (sign_extend:V2DI
10444
           (vec_select:V2SI
10445
            (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10446
            (parallel [(const_int 1)
10447
                       (const_int 3)])))
10448
          (vec_select:V2SI
10449
           (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10450
           (parallel [(const_int 1)
10451
                      (const_int 3)])))
10452
         (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10453
  "TARGET_XOP"
10454
  "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10455
  [(set_attr "type" "ssemuladd")
10456
   (set_attr "mode" "TI")])
10457
 
10458
(define_insn "xop_pmacssdqh"
10459
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10460
        (ss_plus:V2DI
10461
         (mult:V2DI
10462
          (sign_extend:V2DI
10463
           (vec_select:V2SI
10464
            (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10465
            (parallel [(const_int 0)
10466
                       (const_int 2)])))
10467
          (sign_extend:V2DI
10468
           (vec_select:V2SI
10469
            (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10470
            (parallel [(const_int 0)
10471
                       (const_int 2)]))))
10472
         (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10473
  "TARGET_XOP"
10474
  "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10475
  [(set_attr "type" "ssemuladd")
10476
   (set_attr "mode" "TI")])
10477
 
10478
(define_insn "xop_pmacsdql"
10479
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10480
        (plus:V2DI
10481
         (mult:V2DI
10482
          (sign_extend:V2DI
10483
           (vec_select:V2SI
10484
            (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10485
            (parallel [(const_int 1)
10486
                       (const_int 3)])))
10487
          (sign_extend:V2DI
10488
           (vec_select:V2SI
10489
            (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10490
            (parallel [(const_int 1)
10491
                       (const_int 3)]))))
10492
         (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10493
  "TARGET_XOP"
10494
  "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10495
  [(set_attr "type" "ssemuladd")
10496
   (set_attr "mode" "TI")])
10497
 
10498
;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10499
;; fake it with a multiply/add.  In general, we expect the define_split to
10500
;; occur before register allocation, so we have to handle the corner case where
10501
;; the target is the same as operands 1/2
10502
(define_insn_and_split "xop_mulv2div2di3_low"
10503
  [(set (match_operand:V2DI 0 "register_operand" "=&x")
10504
        (mult:V2DI
10505
          (sign_extend:V2DI
10506
            (vec_select:V2SI
10507
              (match_operand:V4SI 1 "register_operand" "%x")
10508
              (parallel [(const_int 1)
10509
                         (const_int 3)])))
10510
          (sign_extend:V2DI
10511
            (vec_select:V2SI
10512
              (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10513
              (parallel [(const_int 1)
10514
                         (const_int 3)])))))]
10515
  "TARGET_XOP"
10516
  "#"
10517
  "&& reload_completed"
10518
  [(set (match_dup 0)
10519
        (match_dup 3))
10520
   (set (match_dup 0)
10521
        (plus:V2DI
10522
         (mult:V2DI
10523
          (sign_extend:V2DI
10524
           (vec_select:V2SI
10525
            (match_dup 1)
10526
            (parallel [(const_int 1)
10527
                       (const_int 3)])))
10528
          (sign_extend:V2DI
10529
           (vec_select:V2SI
10530
            (match_dup 2)
10531
            (parallel [(const_int 1)
10532
                       (const_int 3)]))))
10533
         (match_dup 0)))]
10534
{
10535
  operands[3] = CONST0_RTX (V2DImode);
10536
}
10537
  [(set_attr "type" "ssemul")
10538
   (set_attr "mode" "TI")])
10539
 
10540
(define_insn "xop_pmacsdqh"
10541
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10542
        (plus:V2DI
10543
         (mult:V2DI
10544
          (sign_extend:V2DI
10545
           (vec_select:V2SI
10546
            (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10547
            (parallel [(const_int 0)
10548
                       (const_int 2)])))
10549
          (sign_extend:V2DI
10550
           (vec_select:V2SI
10551
            (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10552
            (parallel [(const_int 0)
10553
                       (const_int 2)]))))
10554
         (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10555
  "TARGET_XOP"
10556
  "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10557
  [(set_attr "type" "ssemuladd")
10558
   (set_attr "mode" "TI")])
10559
 
10560
;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10561
;; fake it with a multiply/add.  In general, we expect the define_split to
10562
;; occur before register allocation, so we have to handle the corner case where
10563
;; the target is the same as either operands[1] or operands[2]
10564
(define_insn_and_split "xop_mulv2div2di3_high"
10565
  [(set (match_operand:V2DI 0 "register_operand" "=&x")
10566
        (mult:V2DI
10567
          (sign_extend:V2DI
10568
            (vec_select:V2SI
10569
              (match_operand:V4SI 1 "register_operand" "%x")
10570
              (parallel [(const_int 0)
10571
                         (const_int 2)])))
10572
          (sign_extend:V2DI
10573
            (vec_select:V2SI
10574
              (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10575
              (parallel [(const_int 0)
10576
                         (const_int 2)])))))]
10577
  "TARGET_XOP"
10578
  "#"
10579
  "&& reload_completed"
10580
  [(set (match_dup 0)
10581
        (match_dup 3))
10582
   (set (match_dup 0)
10583
        (plus:V2DI
10584
         (mult:V2DI
10585
          (sign_extend:V2DI
10586
           (vec_select:V2SI
10587
            (match_dup 1)
10588
            (parallel [(const_int 0)
10589
                       (const_int 2)])))
10590
          (sign_extend:V2DI
10591
           (vec_select:V2SI
10592
            (match_dup 2)
10593
            (parallel [(const_int 0)
10594
                       (const_int 2)]))))
10595
         (match_dup 0)))]
10596
{
10597
  operands[3] = CONST0_RTX (V2DImode);
10598
}
10599
  [(set_attr "type" "ssemul")
10600
   (set_attr "mode" "TI")])
10601
 
10602
;; XOP parallel integer multiply/add instructions for the intrinisics
10603
(define_insn "xop_pmacsswd"
10604
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10605
        (ss_plus:V4SI
10606
         (mult:V4SI
10607
          (sign_extend:V4SI
10608
           (vec_select:V4HI
10609
            (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10610
            (parallel [(const_int 1)
10611
                       (const_int 3)
10612
                       (const_int 5)
10613
                       (const_int 7)])))
10614
          (sign_extend:V4SI
10615
           (vec_select:V4HI
10616
            (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10617
            (parallel [(const_int 1)
10618
                       (const_int 3)
10619
                       (const_int 5)
10620
                       (const_int 7)]))))
10621
         (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10622
  "TARGET_XOP"
10623
  "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10624
  [(set_attr "type" "ssemuladd")
10625
   (set_attr "mode" "TI")])
10626
 
10627
(define_insn "xop_pmacswd"
10628
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10629
        (plus:V4SI
10630
         (mult:V4SI
10631
          (sign_extend:V4SI
10632
           (vec_select:V4HI
10633
            (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10634
            (parallel [(const_int 1)
10635
                       (const_int 3)
10636
                       (const_int 5)
10637
                       (const_int 7)])))
10638
          (sign_extend:V4SI
10639
           (vec_select:V4HI
10640
            (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10641
            (parallel [(const_int 1)
10642
                       (const_int 3)
10643
                       (const_int 5)
10644
                       (const_int 7)]))))
10645
         (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10646
  "TARGET_XOP"
10647
  "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10648
  [(set_attr "type" "ssemuladd")
10649
   (set_attr "mode" "TI")])
10650
 
10651
(define_insn "xop_pmadcsswd"
10652
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10653
        (ss_plus:V4SI
10654
         (plus:V4SI
10655
          (mult:V4SI
10656
           (sign_extend:V4SI
10657
            (vec_select:V4HI
10658
             (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10659
             (parallel [(const_int 0)
10660
                        (const_int 2)
10661
                        (const_int 4)
10662
                        (const_int 6)])))
10663
           (sign_extend:V4SI
10664
            (vec_select:V4HI
10665
             (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10666
             (parallel [(const_int 0)
10667
                        (const_int 2)
10668
                        (const_int 4)
10669
                        (const_int 6)]))))
10670
          (mult:V4SI
10671
           (sign_extend:V4SI
10672
            (vec_select:V4HI
10673
             (match_dup 1)
10674
             (parallel [(const_int 1)
10675
                        (const_int 3)
10676
                        (const_int 5)
10677
                        (const_int 7)])))
10678
           (sign_extend:V4SI
10679
            (vec_select:V4HI
10680
             (match_dup 2)
10681
             (parallel [(const_int 1)
10682
                        (const_int 3)
10683
                        (const_int 5)
10684
                        (const_int 7)])))))
10685
         (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10686
  "TARGET_XOP"
10687
  "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10688
  [(set_attr "type" "ssemuladd")
10689
   (set_attr "mode" "TI")])
10690
 
10691
(define_insn "xop_pmadcswd"
10692
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10693
        (plus:V4SI
10694
         (plus:V4SI
10695
          (mult:V4SI
10696
           (sign_extend:V4SI
10697
            (vec_select:V4HI
10698
             (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10699
             (parallel [(const_int 0)
10700
                        (const_int 2)
10701
                        (const_int 4)
10702
                        (const_int 6)])))
10703
           (sign_extend:V4SI
10704
            (vec_select:V4HI
10705
             (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10706
             (parallel [(const_int 0)
10707
                        (const_int 2)
10708
                        (const_int 4)
10709
                        (const_int 6)]))))
10710
          (mult:V4SI
10711
           (sign_extend:V4SI
10712
            (vec_select:V4HI
10713
             (match_dup 1)
10714
             (parallel [(const_int 1)
10715
                        (const_int 3)
10716
                        (const_int 5)
10717
                        (const_int 7)])))
10718
           (sign_extend:V4SI
10719
            (vec_select:V4HI
10720
             (match_dup 2)
10721
             (parallel [(const_int 1)
10722
                        (const_int 3)
10723
                        (const_int 5)
10724
                        (const_int 7)])))))
10725
         (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10726
  "TARGET_XOP"
10727
  "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10728
  [(set_attr "type" "ssemuladd")
10729
   (set_attr "mode" "TI")])
10730
 
10731
;; XOP parallel XMM conditional moves
10732
(define_insn "xop_pcmov_"
10733
  [(set (match_operand:V 0 "register_operand" "=x,x")
10734
        (if_then_else:V
10735
          (match_operand:V 3 "nonimmediate_operand" "x,m")
10736
          (match_operand:V 1 "register_operand" "x,x")
10737
          (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
10738
  "TARGET_XOP"
10739
  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10740
  [(set_attr "type" "sse4arg")])
10741
 
10742
;; XOP horizontal add/subtract instructions
10743
(define_insn "xop_phaddbw"
10744
  [(set (match_operand:V8HI 0 "register_operand" "=x")
10745
        (plus:V8HI
10746
         (sign_extend:V8HI
10747
          (vec_select:V8QI
10748
           (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10749
           (parallel [(const_int 0)
10750
                      (const_int 2)
10751
                      (const_int 4)
10752
                      (const_int 6)
10753
                      (const_int 8)
10754
                      (const_int 10)
10755
                      (const_int 12)
10756
                      (const_int 14)])))
10757
         (sign_extend:V8HI
10758
          (vec_select:V8QI
10759
           (match_dup 1)
10760
           (parallel [(const_int 1)
10761
                      (const_int 3)
10762
                      (const_int 5)
10763
                      (const_int 7)
10764
                      (const_int 9)
10765
                      (const_int 11)
10766
                      (const_int 13)
10767
                      (const_int 15)])))))]
10768
  "TARGET_XOP"
10769
  "vphaddbw\t{%1, %0|%0, %1}"
10770
  [(set_attr "type" "sseiadd1")])
10771
 
10772
(define_insn "xop_phaddbd"
10773
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10774
        (plus:V4SI
10775
         (plus:V4SI
10776
          (sign_extend:V4SI
10777
           (vec_select:V4QI
10778
            (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10779
            (parallel [(const_int 0)
10780
                       (const_int 4)
10781
                       (const_int 8)
10782
                       (const_int 12)])))
10783
          (sign_extend:V4SI
10784
           (vec_select:V4QI
10785
            (match_dup 1)
10786
            (parallel [(const_int 1)
10787
                       (const_int 5)
10788
                       (const_int 9)
10789
                       (const_int 13)]))))
10790
         (plus:V4SI
10791
          (sign_extend:V4SI
10792
           (vec_select:V4QI
10793
            (match_dup 1)
10794
            (parallel [(const_int 2)
10795
                       (const_int 6)
10796
                       (const_int 10)
10797
                       (const_int 14)])))
10798
          (sign_extend:V4SI
10799
           (vec_select:V4QI
10800
            (match_dup 1)
10801
            (parallel [(const_int 3)
10802
                       (const_int 7)
10803
                       (const_int 11)
10804
                       (const_int 15)]))))))]
10805
  "TARGET_XOP"
10806
  "vphaddbd\t{%1, %0|%0, %1}"
10807
  [(set_attr "type" "sseiadd1")])
10808
 
10809
(define_insn "xop_phaddbq"
10810
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10811
        (plus:V2DI
10812
         (plus:V2DI
10813
          (plus:V2DI
10814
           (sign_extend:V2DI
10815
            (vec_select:V2QI
10816
             (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10817
             (parallel [(const_int 0)
10818
                        (const_int 4)])))
10819
           (sign_extend:V2DI
10820
            (vec_select:V2QI
10821
             (match_dup 1)
10822
             (parallel [(const_int 1)
10823
                        (const_int 5)]))))
10824
          (plus:V2DI
10825
           (sign_extend:V2DI
10826
            (vec_select:V2QI
10827
             (match_dup 1)
10828
             (parallel [(const_int 2)
10829
                        (const_int 6)])))
10830
           (sign_extend:V2DI
10831
            (vec_select:V2QI
10832
             (match_dup 1)
10833
             (parallel [(const_int 3)
10834
                        (const_int 7)])))))
10835
         (plus:V2DI
10836
          (plus:V2DI
10837
           (sign_extend:V2DI
10838
            (vec_select:V2QI
10839
             (match_dup 1)
10840
             (parallel [(const_int 8)
10841
                        (const_int 12)])))
10842
           (sign_extend:V2DI
10843
            (vec_select:V2QI
10844
             (match_dup 1)
10845
             (parallel [(const_int 9)
10846
                        (const_int 13)]))))
10847
          (plus:V2DI
10848
           (sign_extend:V2DI
10849
            (vec_select:V2QI
10850
             (match_dup 1)
10851
             (parallel [(const_int 10)
10852
                        (const_int 14)])))
10853
           (sign_extend:V2DI
10854
            (vec_select:V2QI
10855
             (match_dup 1)
10856
             (parallel [(const_int 11)
10857
                        (const_int 15)])))))))]
10858
  "TARGET_XOP"
10859
  "vphaddbq\t{%1, %0|%0, %1}"
10860
  [(set_attr "type" "sseiadd1")])
10861
 
10862
(define_insn "xop_phaddwd"
10863
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10864
        (plus:V4SI
10865
         (sign_extend:V4SI
10866
          (vec_select:V4HI
10867
           (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10868
           (parallel [(const_int 0)
10869
                      (const_int 2)
10870
                      (const_int 4)
10871
                      (const_int 6)])))
10872
         (sign_extend:V4SI
10873
          (vec_select:V4HI
10874
           (match_dup 1)
10875
           (parallel [(const_int 1)
10876
                      (const_int 3)
10877
                      (const_int 5)
10878
                      (const_int 7)])))))]
10879
  "TARGET_XOP"
10880
  "vphaddwd\t{%1, %0|%0, %1}"
10881
  [(set_attr "type" "sseiadd1")])
10882
 
10883
(define_insn "xop_phaddwq"
10884
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10885
        (plus:V2DI
10886
         (plus:V2DI
10887
          (sign_extend:V2DI
10888
           (vec_select:V2HI
10889
            (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10890
            (parallel [(const_int 0)
10891
                       (const_int 4)])))
10892
          (sign_extend:V2DI
10893
           (vec_select:V2HI
10894
            (match_dup 1)
10895
            (parallel [(const_int 1)
10896
                       (const_int 5)]))))
10897
         (plus:V2DI
10898
          (sign_extend:V2DI
10899
           (vec_select:V2HI
10900
            (match_dup 1)
10901
            (parallel [(const_int 2)
10902
                       (const_int 6)])))
10903
          (sign_extend:V2DI
10904
           (vec_select:V2HI
10905
            (match_dup 1)
10906
            (parallel [(const_int 3)
10907
                       (const_int 7)]))))))]
10908
  "TARGET_XOP"
10909
  "vphaddwq\t{%1, %0|%0, %1}"
10910
  [(set_attr "type" "sseiadd1")])
10911
 
10912
(define_insn "xop_phadddq"
10913
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10914
        (plus:V2DI
10915
         (sign_extend:V2DI
10916
          (vec_select:V2SI
10917
           (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10918
           (parallel [(const_int 0)
10919
                      (const_int 2)])))
10920
         (sign_extend:V2DI
10921
          (vec_select:V2SI
10922
           (match_dup 1)
10923
           (parallel [(const_int 1)
10924
                      (const_int 3)])))))]
10925
  "TARGET_XOP"
10926
  "vphadddq\t{%1, %0|%0, %1}"
10927
  [(set_attr "type" "sseiadd1")])
10928
 
10929
(define_insn "xop_phaddubw"
10930
  [(set (match_operand:V8HI 0 "register_operand" "=x")
10931
        (plus:V8HI
10932
         (zero_extend:V8HI
10933
          (vec_select:V8QI
10934
           (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10935
           (parallel [(const_int 0)
10936
                      (const_int 2)
10937
                      (const_int 4)
10938
                      (const_int 6)
10939
                      (const_int 8)
10940
                      (const_int 10)
10941
                      (const_int 12)
10942
                      (const_int 14)])))
10943
         (zero_extend:V8HI
10944
          (vec_select:V8QI
10945
           (match_dup 1)
10946
           (parallel [(const_int 1)
10947
                      (const_int 3)
10948
                      (const_int 5)
10949
                      (const_int 7)
10950
                      (const_int 9)
10951
                      (const_int 11)
10952
                      (const_int 13)
10953
                      (const_int 15)])))))]
10954
  "TARGET_XOP"
10955
  "vphaddubw\t{%1, %0|%0, %1}"
10956
  [(set_attr "type" "sseiadd1")])
10957
 
10958
(define_insn "xop_phaddubd"
10959
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10960
        (plus:V4SI
10961
         (plus:V4SI
10962
          (zero_extend:V4SI
10963
           (vec_select:V4QI
10964
            (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10965
            (parallel [(const_int 0)
10966
                       (const_int 4)
10967
                       (const_int 8)
10968
                       (const_int 12)])))
10969
          (zero_extend:V4SI
10970
           (vec_select:V4QI
10971
            (match_dup 1)
10972
            (parallel [(const_int 1)
10973
                       (const_int 5)
10974
                       (const_int 9)
10975
                       (const_int 13)]))))
10976
         (plus:V4SI
10977
          (zero_extend:V4SI
10978
           (vec_select:V4QI
10979
            (match_dup 1)
10980
            (parallel [(const_int 2)
10981
                       (const_int 6)
10982
                       (const_int 10)
10983
                       (const_int 14)])))
10984
          (zero_extend:V4SI
10985
           (vec_select:V4QI
10986
            (match_dup 1)
10987
            (parallel [(const_int 3)
10988
                       (const_int 7)
10989
                       (const_int 11)
10990
                       (const_int 15)]))))))]
10991
  "TARGET_XOP"
10992
  "vphaddubd\t{%1, %0|%0, %1}"
10993
  [(set_attr "type" "sseiadd1")])
10994
 
10995
(define_insn "xop_phaddubq"
10996
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10997
        (plus:V2DI
10998
         (plus:V2DI
10999
          (plus:V2DI
11000
           (zero_extend:V2DI
11001
            (vec_select:V2QI
11002
             (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11003
             (parallel [(const_int 0)
11004
                        (const_int 4)])))
11005
           (sign_extend:V2DI
11006
            (vec_select:V2QI
11007
             (match_dup 1)
11008
             (parallel [(const_int 1)
11009
                        (const_int 5)]))))
11010
          (plus:V2DI
11011
           (zero_extend:V2DI
11012
            (vec_select:V2QI
11013
             (match_dup 1)
11014
             (parallel [(const_int 2)
11015
                        (const_int 6)])))
11016
           (zero_extend:V2DI
11017
            (vec_select:V2QI
11018
             (match_dup 1)
11019
             (parallel [(const_int 3)
11020
                        (const_int 7)])))))
11021
         (plus:V2DI
11022
          (plus:V2DI
11023
           (zero_extend:V2DI
11024
            (vec_select:V2QI
11025
             (match_dup 1)
11026
             (parallel [(const_int 8)
11027
                        (const_int 12)])))
11028
           (sign_extend:V2DI
11029
            (vec_select:V2QI
11030
             (match_dup 1)
11031
             (parallel [(const_int 9)
11032
                        (const_int 13)]))))
11033
          (plus:V2DI
11034
           (zero_extend:V2DI
11035
            (vec_select:V2QI
11036
             (match_dup 1)
11037
             (parallel [(const_int 10)
11038
                        (const_int 14)])))
11039
           (zero_extend:V2DI
11040
            (vec_select:V2QI
11041
             (match_dup 1)
11042
             (parallel [(const_int 11)
11043
                        (const_int 15)])))))))]
11044
  "TARGET_XOP"
11045
  "vphaddubq\t{%1, %0|%0, %1}"
11046
  [(set_attr "type" "sseiadd1")])
11047
 
11048
(define_insn "xop_phadduwd"
11049
  [(set (match_operand:V4SI 0 "register_operand" "=x")
11050
        (plus:V4SI
11051
         (zero_extend:V4SI
11052
          (vec_select:V4HI
11053
           (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11054
           (parallel [(const_int 0)
11055
                      (const_int 2)
11056
                      (const_int 4)
11057
                      (const_int 6)])))
11058
         (zero_extend:V4SI
11059
          (vec_select:V4HI
11060
           (match_dup 1)
11061
           (parallel [(const_int 1)
11062
                      (const_int 3)
11063
                      (const_int 5)
11064
                      (const_int 7)])))))]
11065
  "TARGET_XOP"
11066
  "vphadduwd\t{%1, %0|%0, %1}"
11067
  [(set_attr "type" "sseiadd1")])
11068
 
11069
(define_insn "xop_phadduwq"
11070
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11071
        (plus:V2DI
11072
         (plus:V2DI
11073
          (zero_extend:V2DI
11074
           (vec_select:V2HI
11075
            (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11076
            (parallel [(const_int 0)
11077
                       (const_int 4)])))
11078
          (zero_extend:V2DI
11079
           (vec_select:V2HI
11080
            (match_dup 1)
11081
            (parallel [(const_int 1)
11082
                       (const_int 5)]))))
11083
         (plus:V2DI
11084
          (zero_extend:V2DI
11085
           (vec_select:V2HI
11086
            (match_dup 1)
11087
            (parallel [(const_int 2)
11088
                       (const_int 6)])))
11089
          (zero_extend:V2DI
11090
           (vec_select:V2HI
11091
            (match_dup 1)
11092
            (parallel [(const_int 3)
11093
                       (const_int 7)]))))))]
11094
  "TARGET_XOP"
11095
  "vphadduwq\t{%1, %0|%0, %1}"
11096
  [(set_attr "type" "sseiadd1")])
11097
 
11098
(define_insn "xop_phaddudq"
11099
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11100
        (plus:V2DI
11101
         (zero_extend:V2DI
11102
          (vec_select:V2SI
11103
           (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11104
           (parallel [(const_int 0)
11105
                      (const_int 2)])))
11106
         (zero_extend:V2DI
11107
          (vec_select:V2SI
11108
           (match_dup 1)
11109
           (parallel [(const_int 1)
11110
                      (const_int 3)])))))]
11111
  "TARGET_XOP"
11112
  "vphaddudq\t{%1, %0|%0, %1}"
11113
  [(set_attr "type" "sseiadd1")])
11114
 
11115
(define_insn "xop_phsubbw"
11116
  [(set (match_operand:V8HI 0 "register_operand" "=x")
11117
        (minus:V8HI
11118
         (sign_extend:V8HI
11119
          (vec_select:V8QI
11120
           (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11121
           (parallel [(const_int 0)
11122
                      (const_int 2)
11123
                      (const_int 4)
11124
                      (const_int 6)
11125
                      (const_int 8)
11126
                      (const_int 10)
11127
                      (const_int 12)
11128
                      (const_int 14)])))
11129
         (sign_extend:V8HI
11130
          (vec_select:V8QI
11131
           (match_dup 1)
11132
           (parallel [(const_int 1)
11133
                      (const_int 3)
11134
                      (const_int 5)
11135
                      (const_int 7)
11136
                      (const_int 9)
11137
                      (const_int 11)
11138
                      (const_int 13)
11139
                      (const_int 15)])))))]
11140
  "TARGET_XOP"
11141
  "vphsubbw\t{%1, %0|%0, %1}"
11142
  [(set_attr "type" "sseiadd1")])
11143
 
11144
(define_insn "xop_phsubwd"
11145
  [(set (match_operand:V4SI 0 "register_operand" "=x")
11146
        (minus:V4SI
11147
         (sign_extend:V4SI
11148
          (vec_select:V4HI
11149
           (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11150
           (parallel [(const_int 0)
11151
                      (const_int 2)
11152
                      (const_int 4)
11153
                      (const_int 6)])))
11154
         (sign_extend:V4SI
11155
          (vec_select:V4HI
11156
           (match_dup 1)
11157
           (parallel [(const_int 1)
11158
                      (const_int 3)
11159
                      (const_int 5)
11160
                      (const_int 7)])))))]
11161
  "TARGET_XOP"
11162
  "vphsubwd\t{%1, %0|%0, %1}"
11163
  [(set_attr "type" "sseiadd1")])
11164
 
11165
(define_insn "xop_phsubdq"
11166
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11167
        (minus:V2DI
11168
         (sign_extend:V2DI
11169
          (vec_select:V2SI
11170
           (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11171
           (parallel [(const_int 0)
11172
                      (const_int 2)])))
11173
         (sign_extend:V2DI
11174
          (vec_select:V2SI
11175
           (match_dup 1)
11176
           (parallel [(const_int 1)
11177
                      (const_int 3)])))))]
11178
  "TARGET_XOP"
11179
  "vphsubdq\t{%1, %0|%0, %1}"
11180
  [(set_attr "type" "sseiadd1")])
11181
 
11182
;; XOP permute instructions
11183
(define_insn "xop_pperm"
11184
  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11185
        (unspec:V16QI
11186
          [(match_operand:V16QI 1 "register_operand" "x,x")
11187
           (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11188
           (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11189
          UNSPEC_XOP_PERMUTE))]
11190
  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11191
  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11192
  [(set_attr "type" "sse4arg")
11193
   (set_attr "mode" "TI")])
11194
 
11195
;; XOP pack instructions that combine two vectors into a smaller vector
11196
(define_insn "xop_pperm_pack_v2di_v4si"
11197
  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11198
        (vec_concat:V4SI
11199
         (truncate:V2SI
11200
          (match_operand:V2DI 1 "register_operand" "x,x"))
11201
         (truncate:V2SI
11202
          (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11203
   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11204
  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11205
  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11206
  [(set_attr "type" "sse4arg")
11207
   (set_attr "mode" "TI")])
11208
 
11209
(define_insn "xop_pperm_pack_v4si_v8hi"
11210
  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11211
        (vec_concat:V8HI
11212
         (truncate:V4HI
11213
          (match_operand:V4SI 1 "register_operand" "x,x"))
11214
         (truncate:V4HI
11215
          (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11216
   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11217
  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11218
  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11219
  [(set_attr "type" "sse4arg")
11220
   (set_attr "mode" "TI")])
11221
 
11222
(define_insn "xop_pperm_pack_v8hi_v16qi"
11223
  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11224
        (vec_concat:V16QI
11225
         (truncate:V8QI
11226
          (match_operand:V8HI 1 "register_operand" "x,x"))
11227
         (truncate:V8QI
11228
          (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11229
   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11230
  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11231
  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11232
  [(set_attr "type" "sse4arg")
11233
   (set_attr "mode" "TI")])
11234
 
11235
;; XOP packed rotate instructions
11236
(define_expand "rotl3"
11237
  [(set (match_operand:VI_128 0 "register_operand" "")
11238
        (rotate:VI_128
11239
         (match_operand:VI_128 1 "nonimmediate_operand" "")
11240
         (match_operand:SI 2 "general_operand")))]
11241
  "TARGET_XOP"
11242
{
11243
  /* If we were given a scalar, convert it to parallel */
11244
  if (! const_0_to__operand (operands[2], SImode))
11245
    {
11246
      rtvec vs = rtvec_alloc ();
11247
      rtx par = gen_rtx_PARALLEL (mode, vs);
11248
      rtx reg = gen_reg_rtx (mode);
11249
      rtx op2 = operands[2];
11250
      int i;
11251
 
11252
      if (GET_MODE (op2) != mode)
11253
        {
11254
          op2 = gen_reg_rtx (mode);
11255
          convert_move (op2, operands[2], false);
11256
        }
11257
 
11258
      for (i = 0; i < ; i++)
11259
        RTVEC_ELT (vs, i) = op2;
11260
 
11261
      emit_insn (gen_vec_init (reg, par));
11262
      emit_insn (gen_xop_vrotl3 (operands[0], operands[1], reg));
11263
      DONE;
11264
    }
11265
})
11266
 
11267
(define_expand "rotr3"
11268
  [(set (match_operand:VI_128 0 "register_operand" "")
11269
        (rotatert:VI_128
11270
         (match_operand:VI_128 1 "nonimmediate_operand" "")
11271
         (match_operand:SI 2 "general_operand")))]
11272
  "TARGET_XOP"
11273
{
11274
  /* If we were given a scalar, convert it to parallel */
11275
  if (! const_0_to__operand (operands[2], SImode))
11276
    {
11277
      rtvec vs = rtvec_alloc ();
11278
      rtx par = gen_rtx_PARALLEL (mode, vs);
11279
      rtx neg = gen_reg_rtx (mode);
11280
      rtx reg = gen_reg_rtx (mode);
11281
      rtx op2 = operands[2];
11282
      int i;
11283
 
11284
      if (GET_MODE (op2) != mode)
11285
        {
11286
          op2 = gen_reg_rtx (mode);
11287
          convert_move (op2, operands[2], false);
11288
        }
11289
 
11290
      for (i = 0; i < ; i++)
11291
        RTVEC_ELT (vs, i) = op2;
11292
 
11293
      emit_insn (gen_vec_init (reg, par));
11294
      emit_insn (gen_neg2 (neg, reg));
11295
      emit_insn (gen_xop_vrotl3 (operands[0], operands[1], neg));
11296
      DONE;
11297
    }
11298
})
11299
 
11300
(define_insn "xop_rotl3"
11301
  [(set (match_operand:VI_128 0 "register_operand" "=x")
11302
        (rotate:VI_128
11303
         (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11304
         (match_operand:SI 2 "const_0_to__operand" "n")))]
11305
  "TARGET_XOP"
11306
  "vprot\t{%2, %1, %0|%0, %1, %2}"
11307
  [(set_attr "type" "sseishft")
11308
   (set_attr "length_immediate" "1")
11309
   (set_attr "mode" "TI")])
11310
 
11311
(define_insn "xop_rotr3"
11312
  [(set (match_operand:VI_128 0 "register_operand" "=x")
11313
        (rotatert:VI_128
11314
         (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11315
         (match_operand:SI 2 "const_0_to__operand" "n")))]
11316
  "TARGET_XOP"
11317
{
11318
  operands[3] = GEN_INT (( * 8) - INTVAL (operands[2]));
11319
  return \"vprot\t{%3, %1, %0|%0, %1, %3}\";
11320
}
11321
  [(set_attr "type" "sseishft")
11322
   (set_attr "length_immediate" "1")
11323
   (set_attr "mode" "TI")])
11324
 
11325
(define_expand "vrotr3"
11326
  [(match_operand:VI_128 0 "register_operand" "")
11327
   (match_operand:VI_128 1 "register_operand" "")
11328
   (match_operand:VI_128 2 "register_operand" "")]
11329
  "TARGET_XOP"
11330
{
11331
  rtx reg = gen_reg_rtx (mode);
11332
  emit_insn (gen_neg2 (reg, operands[2]));
11333
  emit_insn (gen_xop_vrotl3 (operands[0], operands[1], reg));
11334
  DONE;
11335
})
11336
 
11337
(define_expand "vrotl3"
11338
  [(match_operand:VI_128 0 "register_operand" "")
11339
   (match_operand:VI_128 1 "register_operand" "")
11340
   (match_operand:VI_128 2 "register_operand" "")]
11341
  "TARGET_XOP"
11342
{
11343
  emit_insn (gen_xop_vrotl3 (operands[0], operands[1], operands[2]));
11344
  DONE;
11345
})
11346
 
11347
(define_insn "xop_vrotl3"
11348
  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11349
        (if_then_else:VI_128
11350
         (ge:VI_128
11351
          (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11352
          (const_int 0))
11353
         (rotate:VI_128
11354
          (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11355
          (match_dup 2))
11356
         (rotatert:VI_128
11357
          (match_dup 1)
11358
          (neg:VI_128 (match_dup 2)))))]
11359
  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11360
  "vprot\t{%2, %1, %0|%0, %1, %2}"
11361
  [(set_attr "type" "sseishft")
11362
   (set_attr "prefix_data16" "0")
11363
   (set_attr "prefix_extra" "2")
11364
   (set_attr "mode" "TI")])
11365
 
11366
;; XOP packed shift instructions.
11367
(define_expand "vlshr3"
11368
  [(set (match_operand:VI12_128 0 "register_operand" "")
11369
        (lshiftrt:VI12_128
11370
          (match_operand:VI12_128 1 "register_operand" "")
11371
          (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11372
  "TARGET_XOP"
11373
{
11374
  rtx neg = gen_reg_rtx (mode);
11375
  emit_insn (gen_neg2 (neg, operands[2]));
11376
  emit_insn (gen_xop_shl3 (operands[0], operands[1], neg));
11377
  DONE;
11378
})
11379
 
11380
(define_expand "vlshr3"
11381
  [(set (match_operand:VI48_128 0 "register_operand" "")
11382
        (lshiftrt:VI48_128
11383
          (match_operand:VI48_128 1 "register_operand" "")
11384
          (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11385
  "TARGET_AVX2 || TARGET_XOP"
11386
{
11387
  if (!TARGET_AVX2)
11388
    {
11389
      rtx neg = gen_reg_rtx (mode);
11390
      emit_insn (gen_neg2 (neg, operands[2]));
11391
      emit_insn (gen_xop_shl3 (operands[0], operands[1], neg));
11392
      DONE;
11393
    }
11394
})
11395
 
11396
(define_expand "vlshr3"
11397
  [(set (match_operand:VI48_256 0 "register_operand" "")
11398
        (lshiftrt:VI48_256
11399
          (match_operand:VI48_256 1 "register_operand" "")
11400
          (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11401
  "TARGET_AVX2")
11402
 
11403
(define_expand "vashr3"
11404
  [(set (match_operand:VI128_128 0 "register_operand" "")
11405
        (ashiftrt:VI128_128
11406
          (match_operand:VI128_128 1 "register_operand" "")
11407
          (match_operand:VI128_128 2 "nonimmediate_operand" "")))]
11408
  "TARGET_XOP"
11409
{
11410
  rtx neg = gen_reg_rtx (mode);
11411
  emit_insn (gen_neg2 (neg, operands[2]));
11412
  emit_insn (gen_xop_sha3 (operands[0], operands[1], neg));
11413
  DONE;
11414
})
11415
 
11416
(define_expand "vashrv4si3"
11417
  [(set (match_operand:V4SI 0 "register_operand" "")
11418
        (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "")
11419
                       (match_operand:V4SI 2 "nonimmediate_operand" "")))]
11420
  "TARGET_AVX2 || TARGET_XOP"
11421
{
11422
  if (!TARGET_AVX2)
11423
    {
11424
      rtx neg = gen_reg_rtx (V4SImode);
11425
      emit_insn (gen_negv4si2 (neg, operands[2]));
11426
      emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
11427
      DONE;
11428
    }
11429
})
11430
 
11431
(define_expand "vashrv8si3"
11432
  [(set (match_operand:V8SI 0 "register_operand" "")
11433
        (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand" "")
11434
                       (match_operand:V8SI 2 "nonimmediate_operand" "")))]
11435
  "TARGET_AVX2")
11436
 
11437
(define_expand "vashl3"
11438
  [(set (match_operand:VI12_128 0 "register_operand" "")
11439
        (ashift:VI12_128
11440
          (match_operand:VI12_128 1 "register_operand" "")
11441
          (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11442
  "TARGET_XOP"
11443
{
11444
  emit_insn (gen_xop_sha3 (operands[0], operands[1], operands[2]));
11445
  DONE;
11446
})
11447
 
11448
(define_expand "vashl3"
11449
  [(set (match_operand:VI48_128 0 "register_operand" "")
11450
        (ashift:VI48_128
11451
          (match_operand:VI48_128 1 "register_operand" "")
11452
          (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11453
  "TARGET_AVX2 || TARGET_XOP"
11454
{
11455
  if (!TARGET_AVX2)
11456
    {
11457
      operands[2] = force_reg (mode, operands[2]);
11458
      emit_insn (gen_xop_sha3 (operands[0], operands[1], operands[2]));
11459
      DONE;
11460
    }
11461
})
11462
 
11463
(define_expand "vashl3"
11464
  [(set (match_operand:VI48_256 0 "register_operand" "")
11465
        (ashift:VI48_256
11466
          (match_operand:VI48_256 1 "register_operand" "")
11467
          (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11468
  "TARGET_AVX2")
11469
 
11470
(define_insn "xop_sha3"
11471
  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11472
        (if_then_else:VI_128
11473
         (ge:VI_128
11474
          (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11475
          (const_int 0))
11476
         (ashift:VI_128
11477
          (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11478
          (match_dup 2))
11479
         (ashiftrt:VI_128
11480
          (match_dup 1)
11481
          (neg:VI_128 (match_dup 2)))))]
11482
  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11483
  "vpsha\t{%2, %1, %0|%0, %1, %2}"
11484
  [(set_attr "type" "sseishft")
11485
   (set_attr "prefix_data16" "0")
11486
   (set_attr "prefix_extra" "2")
11487
   (set_attr "mode" "TI")])
11488
 
11489
(define_insn "xop_shl3"
11490
  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11491
        (if_then_else:VI_128
11492
         (ge:VI_128
11493
          (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11494
          (const_int 0))
11495
         (ashift:VI_128
11496
          (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11497
          (match_dup 2))
11498
         (lshiftrt:VI_128
11499
          (match_dup 1)
11500
          (neg:VI_128 (match_dup 2)))))]
11501
  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11502
  "vpshl\t{%2, %1, %0|%0, %1, %2}"
11503
  [(set_attr "type" "sseishft")
11504
   (set_attr "prefix_data16" "0")
11505
   (set_attr "prefix_extra" "2")
11506
   (set_attr "mode" "TI")])
11507
 
11508
;; SSE2 doesn't have some shift variants, so define versions for XOP
11509
(define_expand "ashlv16qi3"
11510
  [(set (match_operand:V16QI 0 "register_operand" "")
11511
        (ashift:V16QI
11512
          (match_operand:V16QI 1 "register_operand" "")
11513
          (match_operand:SI 2 "nonmemory_operand" "")))]
11514
  "TARGET_XOP"
11515
{
11516
  rtx reg = gen_reg_rtx (V16QImode);
11517
  rtx par;
11518
  int i;
11519
 
11520
  par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11521
  for (i = 0; i < 16; i++)
11522
    XVECEXP (par, 0, i) = operands[2];
11523
 
11524
  emit_insn (gen_vec_initv16qi (reg, par));
11525
  emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], reg));
11526
  DONE;
11527
})
11528
 
11529
(define_expand "v16qi3"
11530
  [(set (match_operand:V16QI 0 "register_operand" "")
11531
        (any_shiftrt:V16QI
11532
          (match_operand:V16QI 1 "register_operand" "")
11533
          (match_operand:SI 2 "nonmemory_operand" "")))]
11534
  "TARGET_XOP"
11535
{
11536
  rtx reg = gen_reg_rtx (V16QImode);
11537
  rtx par;
11538
  bool negate = false;
11539
  rtx (*shift_insn)(rtx, rtx, rtx);
11540
  int i;
11541
 
11542
  if (CONST_INT_P (operands[2]))
11543
    operands[2] = GEN_INT (-INTVAL (operands[2]));
11544
  else
11545
    negate = true;
11546
 
11547
  par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11548
  for (i = 0; i < 16; i++)
11549
    XVECEXP (par, 0, i) = operands[2];
11550
 
11551
  emit_insn (gen_vec_initv16qi (reg, par));
11552
 
11553
  if (negate)
11554
    emit_insn (gen_negv16qi2 (reg, reg));
11555
 
11556
  if ( == LSHIFTRT)
11557
    shift_insn = gen_xop_shlv16qi3;
11558
  else
11559
    shift_insn = gen_xop_shav16qi3;
11560
 
11561
  emit_insn (shift_insn (operands[0], operands[1], reg));
11562
  DONE;
11563
})
11564
 
11565
(define_expand "ashrv2di3"
11566
  [(set (match_operand:V2DI 0 "register_operand" "")
11567
        (ashiftrt:V2DI
11568
          (match_operand:V2DI 1 "register_operand" "")
11569
          (match_operand:DI 2 "nonmemory_operand" "")))]
11570
  "TARGET_XOP"
11571
{
11572
  rtx reg = gen_reg_rtx (V2DImode);
11573
  rtx par;
11574
  bool negate = false;
11575
  int i;
11576
 
11577
  if (CONST_INT_P (operands[2]))
11578
    operands[2] = GEN_INT (-INTVAL (operands[2]));
11579
  else
11580
    negate = true;
11581
 
11582
  par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
11583
  for (i = 0; i < 2; i++)
11584
    XVECEXP (par, 0, i) = operands[2];
11585
 
11586
  emit_insn (gen_vec_initv2di (reg, par));
11587
 
11588
  if (negate)
11589
    emit_insn (gen_negv2di2 (reg, reg));
11590
 
11591
  emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
11592
  DONE;
11593
})
11594
 
11595
;; XOP FRCZ support
11596
(define_insn "xop_frcz2"
11597
  [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11598
        (unspec:FMAMODE
11599
         [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11600
         UNSPEC_FRCZ))]
11601
  "TARGET_XOP"
11602
  "vfrcz\t{%1, %0|%0, %1}"
11603
  [(set_attr "type" "ssecvt1")
11604
   (set_attr "mode" "")])
11605
 
11606
;; scalar insns
11607
(define_expand "xop_vmfrcz2"
11608
  [(set (match_operand:VF_128 0 "register_operand")
11609
        (vec_merge:VF_128
11610
          (unspec:VF_128
11611
           [(match_operand:VF_128 1 "nonimmediate_operand")]
11612
           UNSPEC_FRCZ)
11613
          (match_dup 3)
11614
          (const_int 1)))]
11615
  "TARGET_XOP"
11616
{
11617
  operands[3] = CONST0_RTX (mode);
11618
})
11619
 
11620
(define_insn "*xop_vmfrcz_"
11621
  [(set (match_operand:VF_128 0 "register_operand" "=x")
11622
        (vec_merge:VF_128
11623
          (unspec:VF_128
11624
           [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11625
           UNSPEC_FRCZ)
11626
          (match_operand:VF_128 2 "const0_operand")
11627
          (const_int 1)))]
11628
  "TARGET_XOP"
11629
  "vfrcz\t{%1, %0|%0, %1}"
11630
  [(set_attr "type" "ssecvt1")
11631
   (set_attr "mode" "")])
11632
 
11633
(define_insn "xop_maskcmp3"
11634
  [(set (match_operand:VI_128 0 "register_operand" "=x")
11635
        (match_operator:VI_128 1 "ix86_comparison_int_operator"
11636
         [(match_operand:VI_128 2 "register_operand" "x")
11637
          (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11638
  "TARGET_XOP"
11639
  "vpcom%Y1\t{%3, %2, %0|%0, %2, %3}"
11640
  [(set_attr "type" "sse4arg")
11641
   (set_attr "prefix_data16" "0")
11642
   (set_attr "prefix_rep" "0")
11643
   (set_attr "prefix_extra" "2")
11644
   (set_attr "length_immediate" "1")
11645
   (set_attr "mode" "TI")])
11646
 
11647
(define_insn "xop_maskcmp_uns3"
11648
  [(set (match_operand:VI_128 0 "register_operand" "=x")
11649
        (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11650
         [(match_operand:VI_128 2 "register_operand" "x")
11651
          (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11652
  "TARGET_XOP"
11653
  "vpcom%Y1u\t{%3, %2, %0|%0, %2, %3}"
11654
  [(set_attr "type" "ssecmp")
11655
   (set_attr "prefix_data16" "0")
11656
   (set_attr "prefix_rep" "0")
11657
   (set_attr "prefix_extra" "2")
11658
   (set_attr "length_immediate" "1")
11659
   (set_attr "mode" "TI")])
11660
 
11661
;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11662
;; and pcomneu* not to be converted to the signed ones in case somebody needs
11663
;; the exact instruction generated for the intrinsic.
11664
(define_insn "xop_maskcmp_uns23"
11665
  [(set (match_operand:VI_128 0 "register_operand" "=x")
11666
        (unspec:VI_128
11667
         [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11668
          [(match_operand:VI_128 2 "register_operand" "x")
11669
           (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11670
         UNSPEC_XOP_UNSIGNED_CMP))]
11671
  "TARGET_XOP"
11672
  "vpcom%Y1u\t{%3, %2, %0|%0, %2, %3}"
11673
  [(set_attr "type" "ssecmp")
11674
   (set_attr "prefix_data16" "0")
11675
   (set_attr "prefix_extra" "2")
11676
   (set_attr "length_immediate" "1")
11677
   (set_attr "mode" "TI")])
11678
 
11679
;; Pcomtrue and pcomfalse support.  These are useless instructions, but are
11680
;; being added here to be complete.
11681
(define_insn "xop_pcom_tf3"
11682
  [(set (match_operand:VI_128 0 "register_operand" "=x")
11683
        (unspec:VI_128
11684
          [(match_operand:VI_128 1 "register_operand" "x")
11685
           (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11686
           (match_operand:SI 3 "const_int_operand" "n")]
11687
          UNSPEC_XOP_TRUEFALSE))]
11688
  "TARGET_XOP"
11689
{
11690
  return ((INTVAL (operands[3]) != 0)
11691
          ? "vpcomtrue\t{%2, %1, %0|%0, %1, %2}"
11692
          : "vpcomfalse\t{%2, %1, %0|%0, %1, %2}");
11693
}
11694
  [(set_attr "type" "ssecmp")
11695
   (set_attr "prefix_data16" "0")
11696
   (set_attr "prefix_extra" "2")
11697
   (set_attr "length_immediate" "1")
11698
   (set_attr "mode" "TI")])
11699
 
11700
(define_insn "xop_vpermil23"
11701
  [(set (match_operand:VF 0 "register_operand" "=x")
11702
        (unspec:VF
11703
          [(match_operand:VF 1 "register_operand" "x")
11704
           (match_operand:VF 2 "nonimmediate_operand" "%x")
11705
           (match_operand: 3 "nonimmediate_operand" "xm")
11706
           (match_operand:SI 4 "const_0_to_3_operand" "n")]
11707
          UNSPEC_VPERMIL2))]
11708
  "TARGET_XOP"
11709
  "vpermil2\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11710
  [(set_attr "type" "sse4arg")
11711
   (set_attr "length_immediate" "1")
11712
   (set_attr "mode" "")])
11713
 
11714
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11715
 
11716
(define_insn "aesenc"
11717
  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11718
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11719
                       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11720
                      UNSPEC_AESENC))]
11721
  "TARGET_AES"
11722
  "@
11723
   aesenc\t{%2, %0|%0, %2}
11724
   vaesenc\t{%2, %1, %0|%0, %1, %2}"
11725
  [(set_attr "isa" "noavx,avx")
11726
   (set_attr "type" "sselog1")
11727
   (set_attr "prefix_extra" "1")
11728
   (set_attr "prefix" "orig,vex")
11729
   (set_attr "mode" "TI")])
11730
 
11731
(define_insn "aesenclast"
11732
  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11733
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11734
                       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11735
                      UNSPEC_AESENCLAST))]
11736
  "TARGET_AES"
11737
  "@
11738
   aesenclast\t{%2, %0|%0, %2}
11739
   vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11740
  [(set_attr "isa" "noavx,avx")
11741
   (set_attr "type" "sselog1")
11742
   (set_attr "prefix_extra" "1")
11743
   (set_attr "prefix" "orig,vex")
11744
   (set_attr "mode" "TI")])
11745
 
11746
(define_insn "aesdec"
11747
  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11748
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11749
                       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11750
                      UNSPEC_AESDEC))]
11751
  "TARGET_AES"
11752
  "@
11753
   aesdec\t{%2, %0|%0, %2}
11754
   vaesdec\t{%2, %1, %0|%0, %1, %2}"
11755
  [(set_attr "isa" "noavx,avx")
11756
   (set_attr "type" "sselog1")
11757
   (set_attr "prefix_extra" "1")
11758
   (set_attr "prefix" "orig,vex")
11759
   (set_attr "mode" "TI")])
11760
 
11761
(define_insn "aesdeclast"
11762
  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11763
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11764
                       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11765
                      UNSPEC_AESDECLAST))]
11766
  "TARGET_AES"
11767
  "@
11768
   aesdeclast\t{%2, %0|%0, %2}
11769
   vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11770
  [(set_attr "isa" "noavx,avx")
11771
   (set_attr "type" "sselog1")
11772
   (set_attr "prefix_extra" "1")
11773
   (set_attr "prefix" "orig,vex")
11774
   (set_attr "mode" "TI")])
11775
 
11776
(define_insn "aesimc"
11777
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11778
        (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11779
                      UNSPEC_AESIMC))]
11780
  "TARGET_AES"
11781
  "%vaesimc\t{%1, %0|%0, %1}"
11782
  [(set_attr "type" "sselog1")
11783
   (set_attr "prefix_extra" "1")
11784
   (set_attr "prefix" "maybe_vex")
11785
   (set_attr "mode" "TI")])
11786
 
11787
(define_insn "aeskeygenassist"
11788
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11789
        (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11790
                      (match_operand:SI 2 "const_0_to_255_operand" "n")]
11791
                     UNSPEC_AESKEYGENASSIST))]
11792
  "TARGET_AES"
11793
  "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11794
  [(set_attr "type" "sselog1")
11795
   (set_attr "prefix_extra" "1")
11796
   (set_attr "length_immediate" "1")
11797
   (set_attr "prefix" "maybe_vex")
11798
   (set_attr "mode" "TI")])
11799
 
11800
(define_insn "pclmulqdq"
11801
  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11802
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11803
                      (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11804
                      (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11805
                     UNSPEC_PCLMUL))]
11806
  "TARGET_PCLMUL"
11807
  "@
11808
   pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11809
   vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11810
  [(set_attr "isa" "noavx,avx")
11811
   (set_attr "type" "sselog1")
11812
   (set_attr "prefix_extra" "1")
11813
   (set_attr "length_immediate" "1")
11814
   (set_attr "prefix" "orig,vex")
11815
   (set_attr "mode" "TI")])
11816
 
11817
(define_expand "avx_vzeroall"
11818
  [(match_par_dup 0 [(const_int 0)])]
11819
  "TARGET_AVX"
11820
{
11821
  int nregs = TARGET_64BIT ? 16 : 8;
11822
  int regno;
11823
 
11824
  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11825
 
11826
  XVECEXP (operands[0], 0, 0)
11827
    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11828
                               UNSPECV_VZEROALL);
11829
 
11830
  for (regno = 0; regno < nregs; regno++)
11831
    XVECEXP (operands[0], 0, regno + 1)
11832
      = gen_rtx_SET (VOIDmode,
11833
                     gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11834
                     CONST0_RTX (V8SImode));
11835
})
11836
 
11837
(define_insn "*avx_vzeroall"
11838
  [(match_parallel 0 "vzeroall_operation"
11839
    [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11840
  "TARGET_AVX"
11841
  "vzeroall"
11842
  [(set_attr "type" "sse")
11843
   (set_attr "modrm" "0")
11844
   (set_attr "memory" "none")
11845
   (set_attr "prefix" "vex")
11846
   (set_attr "mode" "OI")])
11847
 
11848
;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11849
;; if the upper 128bits are unused.
11850
(define_insn "avx_vzeroupper"
11851
  [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11852
                    UNSPECV_VZEROUPPER)]
11853
  "TARGET_AVX"
11854
  "vzeroupper"
11855
  [(set_attr "type" "sse")
11856
   (set_attr "modrm" "0")
11857
   (set_attr "memory" "none")
11858
   (set_attr "prefix" "vex")
11859
   (set_attr "mode" "OI")])
11860
 
11861
(define_mode_attr AVXTOSSEMODE
11862
  [(V4DI "V2DI") (V2DI "V2DI")
11863
   (V8SI "V4SI") (V4SI "V4SI")
11864
   (V16HI "V8HI") (V8HI "V8HI")
11865
   (V32QI "V16QI") (V16QI "V16QI")])
11866
 
11867
(define_insn "avx2_pbroadcast"
11868
  [(set (match_operand:VI 0 "register_operand" "=x")
11869
        (vec_duplicate:VI
11870
          (vec_select:
11871
            (match_operand: 1 "nonimmediate_operand" "xm")
11872
            (parallel [(const_int 0)]))))]
11873
  "TARGET_AVX2"
11874
  "vpbroadcast\t{%1, %0|%0, %1}"
11875
  [(set_attr "type" "ssemov")
11876
   (set_attr "prefix_extra" "1")
11877
   (set_attr "prefix" "vex")
11878
   (set_attr "mode" "")])
11879
 
11880
(define_insn "avx2_permvarv8si"
11881
  [(set (match_operand:V8SI 0 "register_operand" "=x")
11882
        (unspec:V8SI
11883
          [(match_operand:V8SI 1 "register_operand" "x")
11884
           (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
11885
          UNSPEC_VPERMSI))]
11886
  "TARGET_AVX2"
11887
  "vpermd\t{%2, %1, %0|%0, %1, %2}"
11888
  [(set_attr "type" "sselog")
11889
   (set_attr "prefix" "vex")
11890
   (set_attr "mode" "OI")])
11891
 
11892
(define_insn "avx2_permv4df"
11893
  [(set (match_operand:V4DF 0 "register_operand" "=x")
11894
        (unspec:V4DF
11895
          [(match_operand:V4DF 1 "register_operand" "xm")
11896
           (match_operand:SI 2 "const_0_to_255_operand" "n")]
11897
          UNSPEC_VPERMDF))]
11898
  "TARGET_AVX2"
11899
  "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11900
  [(set_attr "type" "sselog")
11901
   (set_attr "prefix_extra" "1")
11902
   (set_attr "prefix" "vex")
11903
   (set_attr "mode" "OI")])
11904
 
11905
(define_insn "avx2_permvarv8sf"
11906
  [(set (match_operand:V8SF 0 "register_operand" "=x")
11907
        (unspec:V8SF
11908
          [(match_operand:V8SF 1 "register_operand" "x")
11909
           (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
11910
          UNSPEC_VPERMSF))]
11911
  "TARGET_AVX2"
11912
  "vpermps\t{%2, %1, %0|%0, %1, %2}"
11913
  [(set_attr "type" "sselog")
11914
   (set_attr "prefix" "vex")
11915
   (set_attr "mode" "OI")])
11916
 
11917
(define_expand "avx2_permv4di"
11918
  [(match_operand:V4DI 0 "register_operand" "")
11919
   (match_operand:V4DI 1 "nonimmediate_operand" "")
11920
   (match_operand:SI 2 "const_0_to_255_operand" "")]
11921
  "TARGET_AVX2"
11922
{
11923
  int mask = INTVAL (operands[2]);
11924
  emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1],
11925
                                  GEN_INT ((mask >> 0) & 3),
11926
                                  GEN_INT ((mask >> 2) & 3),
11927
                                  GEN_INT ((mask >> 4) & 3),
11928
                                  GEN_INT ((mask >> 6) & 3)));
11929
  DONE;
11930
})
11931
 
11932
(define_insn "avx2_permv4di_1"
11933
  [(set (match_operand:V4DI 0 "register_operand" "=x")
11934
        (vec_select:V4DI
11935
          (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11936
          (parallel [(match_operand 2 "const_0_to_3_operand" "")
11937
                     (match_operand 3 "const_0_to_3_operand" "")
11938
                     (match_operand 4 "const_0_to_3_operand" "")
11939
                     (match_operand 5 "const_0_to_3_operand" "")])))]
11940
  "TARGET_AVX2"
11941
{
11942
  int mask = 0;
11943
  mask |= INTVAL (operands[2]) << 0;
11944
  mask |= INTVAL (operands[3]) << 2;
11945
  mask |= INTVAL (operands[4]) << 4;
11946
  mask |= INTVAL (operands[5]) << 6;
11947
  operands[2] = GEN_INT (mask);
11948
  return "vpermq\t{%2, %1, %0|%0, %1, %2}";
11949
}
11950
  [(set_attr "type" "sselog")
11951
   (set_attr "prefix" "vex")
11952
   (set_attr "mode" "OI")])
11953
 
11954
(define_insn "avx2_permv2ti"
11955
  [(set (match_operand:V4DI 0 "register_operand" "=x")
11956
        (unspec:V4DI
11957
          [(match_operand:V4DI 1 "register_operand" "x")
11958
           (match_operand:V4DI 2 "nonimmediate_operand" "xm")
11959
           (match_operand:SI 3 "const_0_to_255_operand" "n")]
11960
          UNSPEC_VPERMTI))]
11961
  "TARGET_AVX2"
11962
  "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11963
  [(set_attr "type" "sselog")
11964
   (set_attr "prefix" "vex")
11965
   (set_attr "mode" "OI")])
11966
 
11967
(define_insn "avx2_vec_dupv4df"
11968
  [(set (match_operand:V4DF 0 "register_operand" "=x")
11969
        (vec_duplicate:V4DF
11970
          (vec_select:DF
11971
            (match_operand:V2DF 1 "register_operand" "x")
11972
            (parallel [(const_int 0)]))))]
11973
  "TARGET_AVX2"
11974
  "vbroadcastsd\t{%1, %0|%0, %1}"
11975
  [(set_attr "type" "sselog1")
11976
   (set_attr "prefix" "vex")
11977
   (set_attr "mode" "V4DF")])
11978
 
11979
;; Modes handled by AVX vec_dup patterns.
11980
(define_mode_iterator AVX_VEC_DUP_MODE
11981
  [V8SI V8SF V4DI V4DF])
11982
 
11983
(define_insn "vec_dup"
11984
  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11985
        (vec_duplicate:AVX_VEC_DUP_MODE
11986
          (match_operand: 1 "nonimmediate_operand" "m,?x")))]
11987
  "TARGET_AVX"
11988
  "@
11989
   vbroadcast\t{%1, %0|%0, %1}
11990
   #"
11991
  [(set_attr "type" "ssemov")
11992
   (set_attr "prefix_extra" "1")
11993
   (set_attr "prefix" "vex")
11994
   (set_attr "mode" "V8SF")])
11995
 
11996
(define_insn "avx2_vbroadcasti128_"
11997
  [(set (match_operand:VI_256 0 "register_operand" "=x")
11998
        (vec_concat:VI_256
11999
          (match_operand: 1 "memory_operand" "m")
12000
          (match_dup 1)))]
12001
  "TARGET_AVX2"
12002
  "vbroadcasti128\t{%1, %0|%0, %1}"
12003
  [(set_attr "type" "ssemov")
12004
   (set_attr "prefix_extra" "1")
12005
   (set_attr "prefix" "vex")
12006
   (set_attr "mode" "OI")])
12007
 
12008
(define_split
12009
  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
12010
        (vec_duplicate:AVX_VEC_DUP_MODE
12011
          (match_operand: 1 "register_operand" "")))]
12012
  "TARGET_AVX && reload_completed"
12013
  [(set (match_dup 2)
12014
        (vec_duplicate: (match_dup 1)))
12015
   (set (match_dup 0)
12016
        (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
12017
  "operands[2] = gen_rtx_REG (mode, REGNO (operands[0]));")
12018
 
12019
(define_insn "avx_vbroadcastf128_"
12020
  [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
12021
        (vec_concat:V_256
12022
          (match_operand: 1 "nonimmediate_operand" "m,0,?x")
12023
          (match_dup 1)))]
12024
  "TARGET_AVX"
12025
  "@
12026
   vbroadcast\t{%1, %0|%0, %1}
12027
   vinsert\t{$1, %1, %0, %0|%0, %0, %1, 1}
12028
   vperm2\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
12029
  [(set_attr "type" "ssemov,sselog1,sselog1")
12030
   (set_attr "prefix_extra" "1")
12031
   (set_attr "length_immediate" "0,1,1")
12032
   (set_attr "prefix" "vex")
12033
   (set_attr "mode" "")])
12034
 
12035
;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
12036
;; If it so happens that the input is in memory, use vbroadcast.
12037
;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
12038
(define_insn "*avx_vperm_broadcast_v4sf"
12039
  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
12040
        (vec_select:V4SF
12041
          (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
12042
          (match_parallel 2 "avx_vbroadcast_operand"
12043
            [(match_operand 3 "const_int_operand" "C,n,n")])))]
12044
  "TARGET_AVX"
12045
{
12046
  int elt = INTVAL (operands[3]);
12047
  switch (which_alternative)
12048
    {
12049
    case 0:
12050
    case 1:
12051
      operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
12052
      return "vbroadcastss\t{%1, %0|%0, %1}";
12053
    case 2:
12054
      operands[2] = GEN_INT (elt * 0x55);
12055
      return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
12056
    default:
12057
      gcc_unreachable ();
12058
    }
12059
}
12060
  [(set_attr "type" "ssemov,ssemov,sselog1")
12061
   (set_attr "prefix_extra" "1")
12062
   (set_attr "length_immediate" "0,0,1")
12063
   (set_attr "prefix" "vex")
12064
   (set_attr "mode" "SF,SF,V4SF")])
12065
 
12066
(define_insn_and_split "*avx_vperm_broadcast_"
12067
  [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
12068
        (vec_select:VF_256
12069
          (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
12070
          (match_parallel 2 "avx_vbroadcast_operand"
12071
            [(match_operand 3 "const_int_operand" "C,n,n")])))]
12072
  "TARGET_AVX"
12073
  "#"
12074
  "&& reload_completed"
12075
  [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
12076
{
12077
  rtx op0 = operands[0], op1 = operands[1];
12078
  int elt = INTVAL (operands[3]);
12079
 
12080
  if (REG_P (op1))
12081
    {
12082
      int mask;
12083
 
12084
      /* Shuffle element we care about into all elements of the 128-bit lane.
12085
         The other lane gets shuffled too, but we don't care.  */
12086
      if (mode == V4DFmode)
12087
        mask = (elt & 1 ? 15 : 0);
12088
      else
12089
        mask = (elt & 3) * 0x55;
12090
      emit_insn (gen_avx_vpermil (op0, op1, GEN_INT (mask)));
12091
 
12092
      /* Shuffle the lane we care about into both lanes of the dest.  */
12093
      mask = (elt / ( / 2)) * 0x11;
12094
      emit_insn (gen_avx_vperm2f1283 (op0, op0, op0, GEN_INT (mask)));
12095
      DONE;
12096
    }
12097
 
12098
  operands[1] = adjust_address_nv (op1, mode,
12099
                                   elt * GET_MODE_SIZE (mode));
12100
})
12101
 
12102
(define_expand "avx_vpermil"
12103
  [(set (match_operand:VF2 0 "register_operand" "")
12104
        (vec_select:VF2
12105
          (match_operand:VF2 1 "nonimmediate_operand" "")
12106
          (match_operand:SI 2 "const_0_to_255_operand" "")))]
12107
  "TARGET_AVX"
12108
{
12109
  int mask = INTVAL (operands[2]);
12110
  rtx perm[];
12111
 
12112
  perm[0] = GEN_INT (mask & 1);
12113
  perm[1] = GEN_INT ((mask >> 1) & 1);
12114
  if (mode == V4DFmode)
12115
    {
12116
      perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
12117
      perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
12118
    }
12119
 
12120
  operands[2]
12121
    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (, perm));
12122
})
12123
 
12124
(define_expand "avx_vpermil"
12125
  [(set (match_operand:VF1 0 "register_operand" "")
12126
        (vec_select:VF1
12127
          (match_operand:VF1 1 "nonimmediate_operand" "")
12128
          (match_operand:SI 2 "const_0_to_255_operand" "")))]
12129
  "TARGET_AVX"
12130
{
12131
  int mask = INTVAL (operands[2]);
12132
  rtx perm[];
12133
 
12134
  perm[0] = GEN_INT (mask & 3);
12135
  perm[1] = GEN_INT ((mask >> 2) & 3);
12136
  perm[2] = GEN_INT ((mask >> 4) & 3);
12137
  perm[3] = GEN_INT ((mask >> 6) & 3);
12138
  if (mode == V8SFmode)
12139
    {
12140
      perm[4] = GEN_INT ((mask & 3) + 4);
12141
      perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
12142
      perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
12143
      perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
12144
    }
12145
 
12146
  operands[2]
12147
    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (, perm));
12148
})
12149
 
12150
(define_insn "*avx_vpermilp"
12151
  [(set (match_operand:VF 0 "register_operand" "=x")
12152
        (vec_select:VF
12153
          (match_operand:VF 1 "nonimmediate_operand" "xm")
12154
          (match_parallel 2 ""
12155
            [(match_operand 3 "const_int_operand" "")])))]
12156
  "TARGET_AVX
12157
   && avx_vpermilp_parallel (operands[2], mode)"
12158
{
12159
  int mask = avx_vpermilp_parallel (operands[2], mode) - 1;
12160
  operands[2] = GEN_INT (mask);
12161
  return "vpermil\t{%2, %1, %0|%0, %1, %2}";
12162
}
12163
  [(set_attr "type" "sselog")
12164
   (set_attr "prefix_extra" "1")
12165
   (set_attr "length_immediate" "1")
12166
   (set_attr "prefix" "vex")
12167
   (set_attr "mode" "")])
12168
 
12169
(define_insn "avx_vpermilvar3"
12170
  [(set (match_operand:VF 0 "register_operand" "=x")
12171
        (unspec:VF
12172
          [(match_operand:VF 1 "register_operand" "x")
12173
           (match_operand: 2 "nonimmediate_operand" "xm")]
12174
          UNSPEC_VPERMIL))]
12175
  "TARGET_AVX"
12176
  "vpermil\t{%2, %1, %0|%0, %1, %2}"
12177
  [(set_attr "type" "sselog")
12178
   (set_attr "prefix_extra" "1")
12179
   (set_attr "prefix" "vex")
12180
   (set_attr "mode" "")])
12181
 
12182
(define_expand "avx_vperm2f1283"
12183
  [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
12184
        (unspec:AVX256MODE2P
12185
          [(match_operand:AVX256MODE2P 1 "register_operand" "")
12186
           (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
12187
           (match_operand:SI 3 "const_0_to_255_operand" "")]
12188
          UNSPEC_VPERMIL2F128))]
12189
  "TARGET_AVX"
12190
{
12191
  int mask = INTVAL (operands[3]);
12192
  if ((mask & 0x88) == 0)
12193
    {
12194
      rtx perm[], t1, t2;
12195
      int i, base, nelt = , nelt2 = nelt / 2;
12196
 
12197
      base = (mask & 3) * nelt2;
12198
      for (i = 0; i < nelt2; ++i)
12199
        perm[i] = GEN_INT (base + i);
12200
 
12201
      base = ((mask >> 4) & 3) * nelt2;
12202
      for (i = 0; i < nelt2; ++i)
12203
        perm[i + nelt2] = GEN_INT (base + i);
12204
 
12205
      t2 = gen_rtx_VEC_CONCAT (mode,
12206
                               operands[1], operands[2]);
12207
      t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
12208
      t2 = gen_rtx_VEC_SELECT (mode, t2, t1);
12209
      t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
12210
      emit_insn (t2);
12211
      DONE;
12212
    }
12213
})
12214
 
12215
;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
12216
;; means that in order to represent this properly in rtl we'd have to
12217
;; nest *another* vec_concat with a zero operand and do the select from
12218
;; a 4x wide vector.  That doesn't seem very nice.
12219
(define_insn "*avx_vperm2f128_full"
12220
  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12221
        (unspec:AVX256MODE2P
12222
          [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12223
           (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12224
           (match_operand:SI 3 "const_0_to_255_operand" "n")]
12225
          UNSPEC_VPERMIL2F128))]
12226
  "TARGET_AVX"
12227
  "vperm2\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12228
  [(set_attr "type" "sselog")
12229
   (set_attr "prefix_extra" "1")
12230
   (set_attr "length_immediate" "1")
12231
   (set_attr "prefix" "vex")
12232
   (set_attr "mode" "")])
12233
 
12234
(define_insn "*avx_vperm2f128_nozero"
12235
  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12236
        (vec_select:AVX256MODE2P
12237
          (vec_concat:
12238
            (match_operand:AVX256MODE2P 1 "register_operand" "x")
12239
            (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12240
          (match_parallel 3 ""
12241
            [(match_operand 4 "const_int_operand" "")])))]
12242
  "TARGET_AVX
12243
   && avx_vperm2f128_parallel (operands[3], mode)"
12244
{
12245
  int mask = avx_vperm2f128_parallel (operands[3], mode) - 1;
12246
  if (mask == 0x12)
12247
    return "vinsert\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
12248
  if (mask == 0x20)
12249
    return "vinsert\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
12250
  operands[3] = GEN_INT (mask);
12251
  return "vperm2\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12252
}
12253
  [(set_attr "type" "sselog")
12254
   (set_attr "prefix_extra" "1")
12255
   (set_attr "length_immediate" "1")
12256
   (set_attr "prefix" "vex")
12257
   (set_attr "mode" "")])
12258
 
12259
(define_expand "avx_vinsertf128"
12260
  [(match_operand:V_256 0 "register_operand" "")
12261
   (match_operand:V_256 1 "register_operand" "")
12262
   (match_operand: 2 "nonimmediate_operand" "")
12263
   (match_operand:SI 3 "const_0_to_1_operand" "")]
12264
  "TARGET_AVX"
12265
{
12266
  rtx (*insn)(rtx, rtx, rtx);
12267
 
12268
  switch (INTVAL (operands[3]))
12269
    {
12270
    case 0:
12271
      insn = gen_vec_set_lo_;
12272
      break;
12273
    case 1:
12274
      insn = gen_vec_set_hi_;
12275
      break;
12276
    default:
12277
      gcc_unreachable ();
12278
    }
12279
 
12280
  emit_insn (insn (operands[0], operands[1], operands[2]));
12281
  DONE;
12282
})
12283
 
12284
(define_insn "avx2_vec_set_lo_v4di"
12285
  [(set (match_operand:V4DI 0 "register_operand" "=x")
12286
        (vec_concat:V4DI
12287
          (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12288
          (vec_select:V2DI
12289
            (match_operand:V4DI 1 "register_operand" "x")
12290
            (parallel [(const_int 2) (const_int 3)]))))]
12291
  "TARGET_AVX2"
12292
  "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12293
  [(set_attr "type" "sselog")
12294
   (set_attr "prefix_extra" "1")
12295
   (set_attr "length_immediate" "1")
12296
   (set_attr "prefix" "vex")
12297
   (set_attr "mode" "OI")])
12298
 
12299
(define_insn "avx2_vec_set_hi_v4di"
12300
  [(set (match_operand:V4DI 0 "register_operand" "=x")
12301
        (vec_concat:V4DI
12302
          (vec_select:V2DI
12303
            (match_operand:V4DI 1 "register_operand" "x")
12304
            (parallel [(const_int 0) (const_int 1)]))
12305
          (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
12306
  "TARGET_AVX2"
12307
  "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12308
  [(set_attr "type" "sselog")
12309
   (set_attr "prefix_extra" "1")
12310
   (set_attr "length_immediate" "1")
12311
   (set_attr "prefix" "vex")
12312
   (set_attr "mode" "OI")])
12313
 
12314
(define_insn "vec_set_lo_"
12315
  [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12316
        (vec_concat:VI8F_256
12317
          (match_operand: 2 "nonimmediate_operand" "xm")
12318
          (vec_select:
12319
            (match_operand:VI8F_256 1 "register_operand" "x")
12320
            (parallel [(const_int 2) (const_int 3)]))))]
12321
  "TARGET_AVX"
12322
  "vinsert\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12323
  [(set_attr "type" "sselog")
12324
   (set_attr "prefix_extra" "1")
12325
   (set_attr "length_immediate" "1")
12326
   (set_attr "prefix" "vex")
12327
   (set_attr "mode" "")])
12328
 
12329
(define_insn "vec_set_hi_"
12330
  [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12331
        (vec_concat:VI8F_256
12332
          (vec_select:
12333
            (match_operand:VI8F_256 1 "register_operand" "x")
12334
            (parallel [(const_int 0) (const_int 1)]))
12335
          (match_operand: 2 "nonimmediate_operand" "xm")))]
12336
  "TARGET_AVX"
12337
  "vinsert\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12338
  [(set_attr "type" "sselog")
12339
   (set_attr "prefix_extra" "1")
12340
   (set_attr "length_immediate" "1")
12341
   (set_attr "prefix" "vex")
12342
   (set_attr "mode" "")])
12343
 
12344
(define_insn "vec_set_lo_"
12345
  [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12346
        (vec_concat:VI4F_256
12347
          (match_operand: 2 "nonimmediate_operand" "xm")
12348
          (vec_select:
12349
            (match_operand:VI4F_256 1 "register_operand" "x")
12350
            (parallel [(const_int 4) (const_int 5)
12351
                       (const_int 6) (const_int 7)]))))]
12352
  "TARGET_AVX"
12353
  "vinsert\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12354
  [(set_attr "type" "sselog")
12355
   (set_attr "prefix_extra" "1")
12356
   (set_attr "length_immediate" "1")
12357
   (set_attr "prefix" "vex")
12358
   (set_attr "mode" "")])
12359
 
12360
(define_insn "vec_set_hi_"
12361
  [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12362
        (vec_concat:VI4F_256
12363
          (vec_select:
12364
            (match_operand:VI4F_256 1 "register_operand" "x")
12365
            (parallel [(const_int 0) (const_int 1)
12366
                       (const_int 2) (const_int 3)]))
12367
          (match_operand: 2 "nonimmediate_operand" "xm")))]
12368
  "TARGET_AVX"
12369
  "vinsert\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12370
  [(set_attr "type" "sselog")
12371
   (set_attr "prefix_extra" "1")
12372
   (set_attr "length_immediate" "1")
12373
   (set_attr "prefix" "vex")
12374
   (set_attr "mode" "")])
12375
 
12376
(define_insn "vec_set_lo_v16hi"
12377
  [(set (match_operand:V16HI 0 "register_operand" "=x")
12378
        (vec_concat:V16HI
12379
          (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12380
          (vec_select:V8HI
12381
            (match_operand:V16HI 1 "register_operand" "x")
12382
            (parallel [(const_int 8) (const_int 9)
12383
                       (const_int 10) (const_int 11)
12384
                       (const_int 12) (const_int 13)
12385
                       (const_int 14) (const_int 15)]))))]
12386
  "TARGET_AVX"
12387
  "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12388
  [(set_attr "type" "sselog")
12389
   (set_attr "prefix_extra" "1")
12390
   (set_attr "length_immediate" "1")
12391
   (set_attr "prefix" "vex")
12392
   (set_attr "mode" "OI")])
12393
 
12394
(define_insn "vec_set_hi_v16hi"
12395
  [(set (match_operand:V16HI 0 "register_operand" "=x")
12396
        (vec_concat:V16HI
12397
          (vec_select:V8HI
12398
            (match_operand:V16HI 1 "register_operand" "x")
12399
            (parallel [(const_int 0) (const_int 1)
12400
                       (const_int 2) (const_int 3)
12401
                       (const_int 4) (const_int 5)
12402
                       (const_int 6) (const_int 7)]))
12403
          (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12404
  "TARGET_AVX"
12405
  "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12406
  [(set_attr "type" "sselog")
12407
   (set_attr "prefix_extra" "1")
12408
   (set_attr "length_immediate" "1")
12409
   (set_attr "prefix" "vex")
12410
   (set_attr "mode" "OI")])
12411
 
12412
(define_insn "vec_set_lo_v32qi"
12413
  [(set (match_operand:V32QI 0 "register_operand" "=x")
12414
        (vec_concat:V32QI
12415
          (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12416
          (vec_select:V16QI
12417
            (match_operand:V32QI 1 "register_operand" "x")
12418
            (parallel [(const_int 16) (const_int 17)
12419
                       (const_int 18) (const_int 19)
12420
                       (const_int 20) (const_int 21)
12421
                       (const_int 22) (const_int 23)
12422
                       (const_int 24) (const_int 25)
12423
                       (const_int 26) (const_int 27)
12424
                       (const_int 28) (const_int 29)
12425
                       (const_int 30) (const_int 31)]))))]
12426
  "TARGET_AVX"
12427
  "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12428
  [(set_attr "type" "sselog")
12429
   (set_attr "prefix_extra" "1")
12430
   (set_attr "length_immediate" "1")
12431
   (set_attr "prefix" "vex")
12432
   (set_attr "mode" "OI")])
12433
 
12434
(define_insn "vec_set_hi_v32qi"
12435
  [(set (match_operand:V32QI 0 "register_operand" "=x")
12436
        (vec_concat:V32QI
12437
          (vec_select:V16QI
12438
            (match_operand:V32QI 1 "register_operand" "x")
12439
            (parallel [(const_int 0) (const_int 1)
12440
                       (const_int 2) (const_int 3)
12441
                       (const_int 4) (const_int 5)
12442
                       (const_int 6) (const_int 7)
12443
                       (const_int 8) (const_int 9)
12444
                       (const_int 10) (const_int 11)
12445
                       (const_int 12) (const_int 13)
12446
                       (const_int 14) (const_int 15)]))
12447
          (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12448
  "TARGET_AVX"
12449
  "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12450
  [(set_attr "type" "sselog")
12451
   (set_attr "prefix_extra" "1")
12452
   (set_attr "length_immediate" "1")
12453
   (set_attr "prefix" "vex")
12454
   (set_attr "mode" "OI")])
12455
 
12456
(define_insn "_maskload"
12457
  [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
12458
        (unspec:V48_AVX2
12459
          [(match_operand: 2 "register_operand" "x")
12460
           (match_operand:V48_AVX2 1 "memory_operand" "m")]
12461
          UNSPEC_MASKMOV))]
12462
  "TARGET_AVX"
12463
  "vmaskmov\t{%1, %2, %0|%0, %2, %1}"
12464
  [(set_attr "type" "sselog1")
12465
   (set_attr "prefix_extra" "1")
12466
   (set_attr "prefix" "vex")
12467
   (set_attr "mode" "")])
12468
 
12469
(define_insn "_maskstore"
12470
  [(set (match_operand:V48_AVX2 0 "memory_operand" "=m")
12471
        (unspec:V48_AVX2
12472
          [(match_operand: 1 "register_operand" "x")
12473
           (match_operand:V48_AVX2 2 "register_operand" "x")
12474
           (match_dup 0)]
12475
          UNSPEC_MASKMOV))]
12476
  "TARGET_AVX"
12477
  "vmaskmov\t{%2, %1, %0|%0, %1, %2}"
12478
  [(set_attr "type" "sselog1")
12479
   (set_attr "prefix_extra" "1")
12480
   (set_attr "prefix" "vex")
12481
   (set_attr "mode" "")])
12482
 
12483
(define_insn_and_split "avx__"
12484
  [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12485
        (unspec:AVX256MODE2P
12486
          [(match_operand: 1 "nonimmediate_operand" "xm,x")]
12487
          UNSPEC_CAST))]
12488
  "TARGET_AVX"
12489
  "#"
12490
  "&& reload_completed"
12491
  [(const_int 0)]
12492
{
12493
  rtx op0 = operands[0];
12494
  rtx op1 = operands[1];
12495
  if (REG_P (op0))
12496
    op0 = gen_rtx_REG (mode, REGNO (op0));
12497
  else
12498
    op1 = gen_rtx_REG (mode, REGNO (op1));
12499
  emit_move_insn (op0, op1);
12500
  DONE;
12501
})
12502
 
12503
(define_expand "vec_init"
12504
  [(match_operand:V_256 0 "register_operand" "")
12505
   (match_operand 1 "" "")]
12506
  "TARGET_AVX"
12507
{
12508
  ix86_expand_vector_init (false, operands[0], operands[1]);
12509
  DONE;
12510
})
12511
 
12512
(define_expand "avx2_extracti128"
12513
  [(match_operand:V2DI 0 "nonimmediate_operand" "")
12514
   (match_operand:V4DI 1 "register_operand" "")
12515
   (match_operand:SI 2 "const_0_to_1_operand" "")]
12516
  "TARGET_AVX2"
12517
{
12518
  rtx (*insn)(rtx, rtx);
12519
 
12520
  switch (INTVAL (operands[2]))
12521
    {
12522
    case 0:
12523
      insn = gen_vec_extract_lo_v4di;
12524
      break;
12525
    case 1:
12526
      insn = gen_vec_extract_hi_v4di;
12527
      break;
12528
    default:
12529
      gcc_unreachable ();
12530
    }
12531
 
12532
  emit_insn (insn (operands[0], operands[1]));
12533
  DONE;
12534
})
12535
 
12536
(define_expand "avx2_inserti128"
12537
  [(match_operand:V4DI 0 "register_operand" "")
12538
   (match_operand:V4DI 1 "register_operand" "")
12539
   (match_operand:V2DI 2 "nonimmediate_operand" "")
12540
   (match_operand:SI 3 "const_0_to_1_operand" "")]
12541
  "TARGET_AVX2"
12542
{
12543
  rtx (*insn)(rtx, rtx, rtx);
12544
 
12545
  switch (INTVAL (operands[3]))
12546
    {
12547
    case 0:
12548
      insn = gen_avx2_vec_set_lo_v4di;
12549
      break;
12550
    case 1:
12551
      insn = gen_avx2_vec_set_hi_v4di;
12552
      break;
12553
    default:
12554
      gcc_unreachable ();
12555
    }
12556
 
12557
  emit_insn (insn (operands[0], operands[1], operands[2]));
12558
  DONE;
12559
})
12560
 
12561
(define_insn "avx2_ashrv"
12562
  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
12563
        (ashiftrt:VI4_AVX2
12564
          (match_operand:VI4_AVX2 1 "register_operand" "x")
12565
          (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))]
12566
  "TARGET_AVX2"
12567
  "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12568
  [(set_attr "type" "sseishft")
12569
   (set_attr "prefix" "vex")
12570
   (set_attr "mode" "")])
12571
 
12572
(define_insn "avx2_v"
12573
  [(set (match_operand:VI48_AVX2 0 "register_operand" "=x")
12574
        (any_lshift:VI48_AVX2
12575
          (match_operand:VI48_AVX2 1 "register_operand" "x")
12576
          (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))]
12577
  "TARGET_AVX2"
12578
  "vpv\t{%2, %1, %0|%0, %1, %2}"
12579
  [(set_attr "type" "sseishft")
12580
   (set_attr "prefix" "vex")
12581
   (set_attr "mode" "")])
12582
 
12583
(define_insn "avx_vec_concat"
12584
  [(set (match_operand:V_256 0 "register_operand" "=x,x")
12585
        (vec_concat:V_256
12586
          (match_operand: 1 "register_operand" "x,x")
12587
          (match_operand: 2 "vector_move_operand" "xm,C")))]
12588
  "TARGET_AVX"
12589
{
12590
  switch (which_alternative)
12591
    {
12592
    case 0:
12593
      return "vinsert\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12594
    case 1:
12595
      switch (get_attr_mode (insn))
12596
        {
12597
        case MODE_V8SF:
12598
          return "vmovaps\t{%1, %x0|%x0, %1}";
12599
        case MODE_V4DF:
12600
          return "vmovapd\t{%1, %x0|%x0, %1}";
12601
        default:
12602
          return "vmovdqa\t{%1, %x0|%x0, %1}";
12603
        }
12604
    default:
12605
      gcc_unreachable ();
12606
    }
12607
}
12608
  [(set_attr "type" "sselog,ssemov")
12609
   (set_attr "prefix_extra" "1,*")
12610
   (set_attr "length_immediate" "1,*")
12611
   (set_attr "prefix" "vex")
12612
   (set_attr "mode" "")])
12613
 
12614
(define_insn "vcvtph2ps"
12615
  [(set (match_operand:V4SF 0 "register_operand" "=x")
12616
        (vec_select:V4SF
12617
          (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12618
                       UNSPEC_VCVTPH2PS)
12619
          (parallel [(const_int 0) (const_int 1)
12620
                     (const_int 1) (const_int 2)])))]
12621
  "TARGET_F16C"
12622
  "vcvtph2ps\t{%1, %0|%0, %1}"
12623
  [(set_attr "type" "ssecvt")
12624
   (set_attr "prefix" "vex")
12625
   (set_attr "mode" "V4SF")])
12626
 
12627
(define_insn "*vcvtph2ps_load"
12628
  [(set (match_operand:V4SF 0 "register_operand" "=x")
12629
        (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12630
                     UNSPEC_VCVTPH2PS))]
12631
  "TARGET_F16C"
12632
  "vcvtph2ps\t{%1, %0|%0, %1}"
12633
  [(set_attr "type" "ssecvt")
12634
   (set_attr "prefix" "vex")
12635
   (set_attr "mode" "V8SF")])
12636
 
12637
(define_insn "vcvtph2ps256"
12638
  [(set (match_operand:V8SF 0 "register_operand" "=x")
12639
        (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12640
                     UNSPEC_VCVTPH2PS))]
12641
  "TARGET_F16C"
12642
  "vcvtph2ps\t{%1, %0|%0, %1}"
12643
  [(set_attr "type" "ssecvt")
12644
   (set_attr "prefix" "vex")
12645
   (set_attr "mode" "V8SF")])
12646
 
12647
(define_expand "vcvtps2ph"
12648
  [(set (match_operand:V8HI 0 "register_operand" "")
12649
        (vec_concat:V8HI
12650
          (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12651
                        (match_operand:SI 2 "const_0_to_255_operand" "")]
12652
                       UNSPEC_VCVTPS2PH)
12653
          (match_dup 3)))]
12654
  "TARGET_F16C"
12655
  "operands[3] = CONST0_RTX (V4HImode);")
12656
 
12657
(define_insn "*vcvtps2ph"
12658
  [(set (match_operand:V8HI 0 "register_operand" "=x")
12659
        (vec_concat:V8HI
12660
          (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12661
                        (match_operand:SI 2 "const_0_to_255_operand" "N")]
12662
                       UNSPEC_VCVTPS2PH)
12663
          (match_operand:V4HI 3 "const0_operand" "")))]
12664
  "TARGET_F16C"
12665
  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12666
  [(set_attr "type" "ssecvt")
12667
   (set_attr "prefix" "vex")
12668
   (set_attr "mode" "V4SF")])
12669
 
12670
(define_insn "*vcvtps2ph_store"
12671
  [(set (match_operand:V4HI 0 "memory_operand" "=m")
12672
        (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12673
                      (match_operand:SI 2 "const_0_to_255_operand" "N")]
12674
                     UNSPEC_VCVTPS2PH))]
12675
  "TARGET_F16C"
12676
  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12677
  [(set_attr "type" "ssecvt")
12678
   (set_attr "prefix" "vex")
12679
   (set_attr "mode" "V4SF")])
12680
 
12681
(define_insn "vcvtps2ph256"
12682
  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12683
        (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12684
                      (match_operand:SI 2 "const_0_to_255_operand" "N")]
12685
                     UNSPEC_VCVTPS2PH))]
12686
  "TARGET_F16C"
12687
  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12688
  [(set_attr "type" "ssecvt")
12689
   (set_attr "prefix" "vex")
12690
   (set_attr "mode" "V8SF")])
12691
 
12692
;; For gather* insn patterns
12693
(define_mode_iterator VEC_GATHER_MODE
12694
                      [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12695
(define_mode_attr VEC_GATHER_IDXSI
12696
                      [(V2DI "V4SI") (V2DF "V4SI")
12697
                       (V4DI "V4SI") (V4DF "V4SI")
12698
                       (V4SI "V4SI") (V4SF "V4SI")
12699
                       (V8SI "V8SI") (V8SF "V8SI")])
12700
(define_mode_attr VEC_GATHER_IDXDI
12701
                      [(V2DI "V2DI") (V2DF "V2DI")
12702
                       (V4DI "V4DI") (V4DF "V4DI")
12703
                       (V4SI "V2DI") (V4SF "V2DI")
12704
                       (V8SI "V4DI") (V8SF "V4DI")])
12705
(define_mode_attr VEC_GATHER_SRCDI
12706
                      [(V2DI "V2DI") (V2DF "V2DF")
12707
                       (V4DI "V4DI") (V4DF "V4DF")
12708
                       (V4SI "V4SI") (V4SF "V4SF")
12709
                       (V8SI "V4SI") (V8SF "V4SF")])
12710
 
12711
(define_expand "avx2_gathersi"
12712
  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12713
                   (unspec:VEC_GATHER_MODE
12714
                     [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12715
                      (mem:
12716
                        (match_par_dup 7
12717
                          [(match_operand 2 "vsib_address_operand" "")
12718
                           (match_operand:
12719
                              3 "register_operand" "")
12720
                           (match_operand:SI 5 "const1248_operand " "")]))
12721
                      (mem:BLK (scratch))
12722
                      (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12723
                     UNSPEC_GATHER))
12724
              (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12725
  "TARGET_AVX2"
12726
{
12727
  operands[7]
12728
    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12729
                                        operands[5]), UNSPEC_VSIBADDR);
12730
})
12731
 
12732
(define_insn "*avx2_gathersi"
12733
  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12734
        (unspec:VEC_GATHER_MODE
12735
          [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
12736
           (match_operator: 7 "vsib_mem_operator"
12737
             [(unspec:P
12738
                [(match_operand:P 3 "vsib_address_operand" "p")
12739
                 (match_operand: 4 "register_operand" "x")
12740
                 (match_operand:SI 6 "const1248_operand" "n")]
12741
                UNSPEC_VSIBADDR)])
12742
           (mem:BLK (scratch))
12743
           (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
12744
          UNSPEC_GATHER))
12745
   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12746
  "TARGET_AVX2"
12747
  "vgatherd\t{%1, %7, %0|%0, %7, %1}"
12748
  [(set_attr "type" "ssemov")
12749
   (set_attr "prefix" "vex")
12750
   (set_attr "mode" "")])
12751
 
12752
(define_insn "*avx2_gathersi_2"
12753
  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12754
        (unspec:VEC_GATHER_MODE
12755
          [(pc)
12756
           (match_operator: 6 "vsib_mem_operator"
12757
             [(unspec:P
12758
                [(match_operand:P 2 "vsib_address_operand" "p")
12759
                 (match_operand: 3 "register_operand" "x")
12760
                 (match_operand:SI 5 "const1248_operand" "n")]
12761
                UNSPEC_VSIBADDR)])
12762
           (mem:BLK (scratch))
12763
           (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
12764
          UNSPEC_GATHER))
12765
   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12766
  "TARGET_AVX2"
12767
  "vgatherd\t{%1, %6, %0|%0, %6, %1}"
12768
  [(set_attr "type" "ssemov")
12769
   (set_attr "prefix" "vex")
12770
   (set_attr "mode" "")])
12771
 
12772
(define_expand "avx2_gatherdi"
12773
  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12774
                   (unspec:VEC_GATHER_MODE
12775
                     [(match_operand: 1 "register_operand" "")
12776
                      (mem:
12777
                        (match_par_dup 7
12778
                          [(match_operand 2 "vsib_address_operand" "")
12779
                           (match_operand:
12780
                              3 "register_operand" "")
12781
                           (match_operand:SI 5 "const1248_operand " "")]))
12782
                      (mem:BLK (scratch))
12783
                      (match_operand:
12784
                        4 "register_operand" "")]
12785
                     UNSPEC_GATHER))
12786
              (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12787
  "TARGET_AVX2"
12788
{
12789
  operands[7]
12790
    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12791
                                        operands[5]), UNSPEC_VSIBADDR);
12792
})
12793
 
12794
(define_insn "*avx2_gatherdi"
12795
  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12796
        (unspec:VEC_GATHER_MODE
12797
          [(match_operand: 2 "register_operand" "0")
12798
           (match_operator: 7 "vsib_mem_operator"
12799
             [(unspec:P
12800
                [(match_operand:P 3 "vsib_address_operand" "p")
12801
                 (match_operand: 4 "register_operand" "x")
12802
                 (match_operand:SI 6 "const1248_operand" "n")]
12803
                UNSPEC_VSIBADDR)])
12804
           (mem:BLK (scratch))
12805
           (match_operand: 5 "register_operand" "1")]
12806
          UNSPEC_GATHER))
12807
   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12808
  "TARGET_AVX2"
12809
  "vgatherq\t{%5, %7, %2|%2, %7, %5}"
12810
  [(set_attr "type" "ssemov")
12811
   (set_attr "prefix" "vex")
12812
   (set_attr "mode" "")])
12813
 
12814
(define_insn "*avx2_gatherdi_2"
12815
  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12816
        (unspec:VEC_GATHER_MODE
12817
          [(pc)
12818
           (match_operator: 6 "vsib_mem_operator"
12819
             [(unspec:P
12820
                [(match_operand:P 2 "vsib_address_operand" "p")
12821
                 (match_operand: 3 "register_operand" "x")
12822
                 (match_operand:SI 5 "const1248_operand" "n")]
12823
                UNSPEC_VSIBADDR)])
12824
           (mem:BLK (scratch))
12825
           (match_operand: 4 "register_operand" "1")]
12826
          UNSPEC_GATHER))
12827
   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12828
  "TARGET_AVX2"
12829
{
12830
  if (mode != mode)
12831
    return "vgatherq\t{%4, %6, %x0|%x0, %6, %4}";
12832
  return "vgatherq\t{%4, %6, %0|%0, %6, %4}";
12833
}
12834
  [(set_attr "type" "ssemov")
12835
   (set_attr "prefix" "vex")
12836
   (set_attr "mode" "")])
12837
 
12838
(define_insn "*avx2_gatherdi_3"
12839
  [(set (match_operand: 0 "register_operand" "=&x")
12840
        (vec_select:
12841
          (unspec:VI4F_256
12842
            [(match_operand: 2 "register_operand" "0")
12843
             (match_operator: 7 "vsib_mem_operator"
12844
               [(unspec:P
12845
                  [(match_operand:P 3 "vsib_address_operand" "p")
12846
                   (match_operand: 4 "register_operand" "x")
12847
                   (match_operand:SI 6 "const1248_operand" "n")]
12848
                  UNSPEC_VSIBADDR)])
12849
             (mem:BLK (scratch))
12850
             (match_operand: 5 "register_operand" "1")]
12851
             UNSPEC_GATHER)
12852
          (parallel [(const_int 0) (const_int 1)
12853
                     (const_int 2) (const_int 3)])))
12854
   (clobber (match_scratch:VI4F_256 1 "=&x"))]
12855
  "TARGET_AVX2"
12856
  "vgatherq\t{%5, %7, %0|%0, %7, %5}"
12857
  [(set_attr "type" "ssemov")
12858
   (set_attr "prefix" "vex")
12859
   (set_attr "mode" "")])
12860
 
12861
(define_insn "*avx2_gatherdi_4"
12862
  [(set (match_operand: 0 "register_operand" "=&x")
12863
        (vec_select:
12864
          (unspec:VI4F_256
12865
            [(pc)
12866
             (match_operator: 6 "vsib_mem_operator"
12867
               [(unspec:P
12868
                  [(match_operand:P 2 "vsib_address_operand" "p")
12869
                   (match_operand: 3 "register_operand" "x")
12870
                   (match_operand:SI 5 "const1248_operand" "n")]
12871
                  UNSPEC_VSIBADDR)])
12872
             (mem:BLK (scratch))
12873
             (match_operand: 4 "register_operand" "1")]
12874
            UNSPEC_GATHER)
12875
          (parallel [(const_int 0) (const_int 1)
12876
                     (const_int 2) (const_int 3)])))
12877
   (clobber (match_scratch:VI4F_256 1 "=&x"))]
12878
  "TARGET_AVX2"
12879
  "vgatherq\t{%4, %6, %0|%0, %6, %4}"
12880
  [(set_attr "type" "ssemov")
12881
   (set_attr "prefix" "vex")
12882
   (set_attr "mode" "")])

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.