OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [gcc/] [config/] [i386/] [sse.md] - Blame information for rev 856

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 282 jeremybenn
;; GCC machine description for SSE instructions
2
;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3
;; Free Software Foundation, Inc.
4
;;
5
;; This file is part of GCC.
6
;;
7
;; GCC is free software; you can redistribute it and/or modify
8
;; it under the terms of the GNU General Public License as published by
9
;; the Free Software Foundation; either version 3, or (at your option)
10
;; any later version.
11
;;
12
;; GCC is distributed in the hope that it will be useful,
13
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
;; GNU General Public License for more details.
16
;;
17
;; You should have received a copy of the GNU General Public License
18
;; along with GCC; see the file COPYING3.  If not see
19
;; .
20
 
21
 
22
;; 16 byte integral modes handled by SSE
23
(define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
24
 
25
;; All 16-byte vector modes handled by SSE
26
(define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
27
(define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
28
 
29
;; 32 byte integral vector modes handled by AVX
30
(define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
31
 
32
;; All 32-byte vector modes handled by AVX
33
(define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
34
 
35
;; All QI vector modes handled by AVX
36
(define_mode_iterator AVXMODEQI [V32QI V16QI])
37
 
38
;; All DI vector modes handled by AVX
39
(define_mode_iterator AVXMODEDI [V4DI V2DI])
40
 
41
;; All vector modes handled by AVX
42
(define_mode_iterator AVXMODE
43
  [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
44
(define_mode_iterator AVXMODE16
45
  [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
46
 
47
;; Mix-n-match
48
(define_mode_iterator SSEMODE12 [V16QI V8HI])
49
(define_mode_iterator SSEMODE24 [V8HI V4SI])
50
(define_mode_iterator SSEMODE14 [V16QI V4SI])
51
(define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
52
(define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
53
(define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
54
(define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
55
(define_mode_iterator FMA4MODEF4 [V8SF V4DF])
56
(define_mode_iterator SSEMODEF2P [V4SF V2DF])
57
 
58
(define_mode_iterator AVX256MODEF2P [V8SF V4DF])
59
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
60
(define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
61
(define_mode_iterator AVX256MODE4P [V4DI V4DF])
62
(define_mode_iterator AVX256MODE8P [V8SI V8SF])
63
(define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
64
(define_mode_iterator AVXMODEF4P [V4SF V4DF])
65
(define_mode_iterator AVXMODEFDP [V2DF V4DF])
66
(define_mode_iterator AVXMODEFSP [V4SF V8SF])
67
(define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
68
(define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
69
 
70
;; Int-float size matches
71
(define_mode_iterator SSEMODE4S [V4SF V4SI])
72
(define_mode_iterator SSEMODE2D [V2DF V2DI])
73
 
74
;; Modes handled by integer vcond pattern
75
(define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
76
                                    (V2DI "TARGET_SSE4_2")])
77
 
78
;; Modes handled by vec_extract_even/odd pattern.
79
(define_mode_iterator SSEMODE_EO
80
  [(V4SF "TARGET_SSE")
81
   (V2DF "TARGET_SSE2")
82
   (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
83
   (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
84
   (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
85
 
86
;; Mapping from float mode to required SSE level
87
(define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
88
 
89
;; Mapping from integer vector mode to mnemonic suffix
90
(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
91
 
92
;; Mapping of the fma4 suffix
93
(define_mode_attr fma4modesuffixf4 [(V8SF "ps") (V4DF "pd")])
94
(define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
95
                                    (V4SF "ss") (V2DF "sd")])
96
 
97
;; Mapping of the avx suffix
98
(define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
99
                                   (V4SF "ps") (V2DF "pd")])
100
 
101
(define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
102
 
103
(define_mode_attr ssescalarmodesuffix2s [(V4SF "ss") (V4SI "d")])
104
 
105
;; Mapping of the max integer size for xop rotate immediate constraint
106
(define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
107
 
108
;; Mapping of vector modes back to the scalar modes
109
(define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
110
                                 (V16QI "QI") (V8HI "HI")
111
                                 (V4SI "SI") (V2DI "DI")])
112
 
113
;; Mapping of vector modes to a vector mode of double size
114
(define_mode_attr ssedoublesizemode
115
  [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
116
   (V8HI "V16HI") (V16QI "V32QI")
117
   (V4DF "V8DF") (V8SF "V16SF")
118
   (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
119
 
120
;; Number of scalar elements in each vector type
121
(define_mode_attr ssescalarnum
122
  [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
123
   (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
124
 
125
;; Mapping for AVX
126
(define_mode_attr avxvecmode
127
  [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
128
   (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
129
   (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
130
(define_mode_attr avxvecpsmode
131
  [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
132
   (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
133
(define_mode_attr avxhalfvecmode
134
  [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
135
   (V8SF "V4SF") (V4DF "V2DF")
136
   (V16QI  "V8QI") (V8HI  "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
137
(define_mode_attr avxscalarmode
138
  [(V16QI "QI") (V8HI  "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
139
   (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
140
(define_mode_attr avxcvtvecmode
141
  [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
142
(define_mode_attr avxpermvecmode
143
  [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
144
(define_mode_attr avxmodesuffixf2c
145
  [(V4SF "s") (V2DF "d") (V8SI "s") (V8SF "s") (V4DI "d") (V4DF "d")])
146
(define_mode_attr avxmodesuffixp
147
 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
148
  (V4DF "pd")])
149
(define_mode_attr avxmodesuffix
150
  [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
151
   (V8SI "256") (V8SF "256") (V4DF "256")])
152
 
153
;; Mapping of immediate bits for blend instructions
154
(define_mode_attr blendbits
155
  [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
156
 
157
;; Mapping of immediate bits for pinsr instructions
158
(define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
159
 
160
;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
161
 
162
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
163
;;
164
;; Move patterns
165
;;
166
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
167
 
168
(define_expand "mov"
169
  [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
170
        (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
171
  "TARGET_AVX"
172
{
173
  ix86_expand_vector_move (mode, operands);
174
  DONE;
175
})
176
 
177
(define_insn "*avx_mov_internal"
178
  [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
179
        (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand"  "C ,xm,x"))]
180
  "TARGET_AVX
181
   && (register_operand (operands[0], mode)
182
       || register_operand (operands[1], mode))"
183
{
184
  switch (which_alternative)
185
    {
186
    case 0:
187
      return standard_sse_constant_opcode (insn, operands[1]);
188
    case 1:
189
    case 2:
190
      switch (get_attr_mode (insn))
191
        {
192
        case MODE_V8SF:
193
        case MODE_V4SF:
194
          return "vmovaps\t{%1, %0|%0, %1}";
195
        case MODE_V4DF:
196
        case MODE_V2DF:
197
          return "vmovapd\t{%1, %0|%0, %1}";
198
        default:
199
          return "vmovdqa\t{%1, %0|%0, %1}";
200
        }
201
    default:
202
      gcc_unreachable ();
203
    }
204
}
205
  [(set_attr "type" "sselog1,ssemov,ssemov")
206
   (set_attr "prefix" "vex")
207
   (set_attr "mode" "")])
208
 
209
;; All of these patterns are enabled for SSE1 as well as SSE2.
210
;; This is essential for maintaining stable calling conventions.
211
 
212
(define_expand "mov"
213
  [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
214
        (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
215
  "TARGET_SSE"
216
{
217
  ix86_expand_vector_move (mode, operands);
218
  DONE;
219
})
220
 
221
(define_insn "*mov_internal"
222
  [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
223
        (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand"  "C ,xm,x"))]
224
  "TARGET_SSE
225
   && (register_operand (operands[0], mode)
226
       || register_operand (operands[1], mode))"
227
{
228
  switch (which_alternative)
229
    {
230
    case 0:
231
      return standard_sse_constant_opcode (insn, operands[1]);
232
    case 1:
233
    case 2:
234
      switch (get_attr_mode (insn))
235
        {
236
        case MODE_V4SF:
237
          return "movaps\t{%1, %0|%0, %1}";
238
        case MODE_V2DF:
239
          return "movapd\t{%1, %0|%0, %1}";
240
        default:
241
          return "movdqa\t{%1, %0|%0, %1}";
242
        }
243
    default:
244
      gcc_unreachable ();
245
    }
246
}
247
  [(set_attr "type" "sselog1,ssemov,ssemov")
248
   (set (attr "mode")
249
        (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
250
                         (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
251
                    (and (eq_attr "alternative" "2")
252
                         (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
253
                             (const_int 0))))
254
                 (const_string "V4SF")
255
               (eq (const_string "mode") (const_string "V4SFmode"))
256
                 (const_string "V4SF")
257
               (eq (const_string "mode") (const_string "V2DFmode"))
258
                 (const_string "V2DF")
259
              ]
260
          (const_string "TI")))])
261
 
262
;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
263
;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
264
;; from memory, we'd prefer to load the memory directly into the %xmm
265
;; register.  To facilitate this happy circumstance, this pattern won't
266
;; split until after register allocation.  If the 64-bit value didn't
267
;; come from memory, this is the best we can do.  This is much better
268
;; than storing %edx:%eax into a stack temporary and loading an %xmm
269
;; from there.
270
 
271
(define_insn_and_split "movdi_to_sse"
272
  [(parallel
273
    [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
274
          (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
275
     (clobber (match_scratch:V4SI 2 "=&x,X"))])]
276
  "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
277
  "#"
278
  "&& reload_completed"
279
  [(const_int 0)]
280
{
281
 if (register_operand (operands[1], DImode))
282
   {
283
      /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
284
         Assemble the 64-bit DImode value in an xmm register.  */
285
      emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
286
                                  gen_rtx_SUBREG (SImode, operands[1], 0)));
287
      emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
288
                                  gen_rtx_SUBREG (SImode, operands[1], 4)));
289
      emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
290
                                             operands[2]));
291
    }
292
 else if (memory_operand (operands[1], DImode))
293
   emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
294
                                  operands[1], const0_rtx));
295
 else
296
   gcc_unreachable ();
297
})
298
 
299
(define_split
300
  [(set (match_operand:V4SF 0 "register_operand" "")
301
        (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
302
  "TARGET_SSE && reload_completed"
303
  [(set (match_dup 0)
304
        (vec_merge:V4SF
305
          (vec_duplicate:V4SF (match_dup 1))
306
          (match_dup 2)
307
          (const_int 1)))]
308
{
309
  operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
310
  operands[2] = CONST0_RTX (V4SFmode);
311
})
312
 
313
(define_split
314
  [(set (match_operand:V2DF 0 "register_operand" "")
315
        (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
316
  "TARGET_SSE2 && reload_completed"
317
  [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
318
{
319
  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
320
  operands[2] = CONST0_RTX (DFmode);
321
})
322
 
323
(define_expand "push1"
324
  [(match_operand:AVX256MODE 0 "register_operand" "")]
325
  "TARGET_AVX"
326
{
327
  ix86_expand_push (mode, operands[0]);
328
  DONE;
329
})
330
 
331
(define_expand "push1"
332
  [(match_operand:SSEMODE16 0 "register_operand" "")]
333
  "TARGET_SSE"
334
{
335
  ix86_expand_push (mode, operands[0]);
336
  DONE;
337
})
338
 
339
(define_expand "movmisalign"
340
  [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
341
        (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
342
  "TARGET_AVX"
343
{
344
  ix86_expand_vector_move_misalign (mode, operands);
345
  DONE;
346
})
347
 
348
(define_expand "movmisalign"
349
  [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
350
        (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
351
  "TARGET_SSE"
352
{
353
  ix86_expand_vector_move_misalign (mode, operands);
354
  DONE;
355
})
356
 
357
(define_insn "avx_movup"
358
  [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
359
        (unspec:AVXMODEF2P
360
          [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
361
          UNSPEC_MOVU))]
362
  "AVX_VEC_FLOAT_MODE_P (mode)
363
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
364
  "vmovup\t{%1, %0|%0, %1}"
365
  [(set_attr "type" "ssemov")
366
   (set_attr "movu" "1")
367
   (set_attr "prefix" "vex")
368
   (set_attr "mode" "")])
369
 
370
(define_insn "sse2_movq128"
371
  [(set (match_operand:V2DI 0 "register_operand" "=x")
372
        (vec_concat:V2DI
373
          (vec_select:DI
374
            (match_operand:V2DI 1 "nonimmediate_operand" "xm")
375
            (parallel [(const_int 0)]))
376
          (const_int 0)))]
377
  "TARGET_SSE2"
378
  "%vmovq\t{%1, %0|%0, %1}"
379
  [(set_attr "type" "ssemov")
380
   (set_attr "prefix" "maybe_vex")
381
   (set_attr "mode" "TI")])
382
 
383
(define_insn "_movup"
384
  [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
385
        (unspec:SSEMODEF2P
386
          [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
387
          UNSPEC_MOVU))]
388
  "SSE_VEC_FLOAT_MODE_P (mode)
389
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
390
  "movup\t{%1, %0|%0, %1}"
391
  [(set_attr "type" "ssemov")
392
   (set_attr "movu" "1")
393
   (set_attr "mode" "")])
394
 
395
(define_insn "avx_movdqu"
396
  [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
397
        (unspec:AVXMODEQI
398
          [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
399
          UNSPEC_MOVU))]
400
  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
401
  "vmovdqu\t{%1, %0|%0, %1}"
402
  [(set_attr "type" "ssemov")
403
   (set_attr "movu" "1")
404
   (set_attr "prefix" "vex")
405
   (set_attr "mode" "")])
406
 
407
(define_insn "sse2_movdqu"
408
  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
409
        (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
410
                      UNSPEC_MOVU))]
411
  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
412
  "movdqu\t{%1, %0|%0, %1}"
413
  [(set_attr "type" "ssemov")
414
   (set_attr "movu" "1")
415
   (set_attr "prefix_data16" "1")
416
   (set_attr "mode" "TI")])
417
 
418
(define_insn "avx_movnt"
419
  [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
420
        (unspec:AVXMODEF2P
421
          [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
422
          UNSPEC_MOVNT))]
423
  "AVX_VEC_FLOAT_MODE_P (mode)"
424
  "vmovntp\t{%1, %0|%0, %1}"
425
  [(set_attr "type" "ssemov")
426
   (set_attr "prefix" "vex")
427
   (set_attr "mode" "")])
428
 
429
(define_insn "_movnt"
430
  [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
431
        (unspec:SSEMODEF2P
432
          [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
433
          UNSPEC_MOVNT))]
434
  "SSE_VEC_FLOAT_MODE_P (mode)"
435
  "movntp\t{%1, %0|%0, %1}"
436
  [(set_attr "type" "ssemov")
437
   (set_attr "mode" "")])
438
 
439
(define_insn "avx_movnt"
440
  [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
441
        (unspec:AVXMODEDI
442
          [(match_operand:AVXMODEDI 1 "register_operand" "x")]
443
          UNSPEC_MOVNT))]
444
  "TARGET_AVX"
445
  "vmovntdq\t{%1, %0|%0, %1}"
446
  [(set_attr "type" "ssecvt")
447
   (set_attr "prefix" "vex")
448
   (set_attr "mode" "")])
449
 
450
(define_insn "sse2_movntv2di"
451
  [(set (match_operand:V2DI 0 "memory_operand" "=m")
452
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
453
                     UNSPEC_MOVNT))]
454
  "TARGET_SSE2"
455
  "movntdq\t{%1, %0|%0, %1}"
456
  [(set_attr "type" "ssemov")
457
   (set_attr "prefix_data16" "1")
458
   (set_attr "mode" "TI")])
459
 
460
(define_insn "sse2_movntsi"
461
  [(set (match_operand:SI 0 "memory_operand" "=m")
462
        (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
463
                   UNSPEC_MOVNT))]
464
  "TARGET_SSE2"
465
  "movnti\t{%1, %0|%0, %1}"
466
  [(set_attr "type" "ssemov")
467
   (set_attr "prefix_data16" "0")
468
   (set_attr "mode" "V2DF")])
469
 
470
(define_insn "avx_lddqu"
471
  [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
472
        (unspec:AVXMODEQI
473
          [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
474
          UNSPEC_LDDQU))]
475
  "TARGET_AVX"
476
  "vlddqu\t{%1, %0|%0, %1}"
477
  [(set_attr "type" "ssecvt")
478
   (set_attr "movu" "1")
479
   (set_attr "prefix" "vex")
480
   (set_attr "mode" "")])
481
 
482
(define_insn "sse3_lddqu"
483
  [(set (match_operand:V16QI 0 "register_operand" "=x")
484
        (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
485
                      UNSPEC_LDDQU))]
486
  "TARGET_SSE3"
487
  "lddqu\t{%1, %0|%0, %1}"
488
  [(set_attr "type" "ssemov")
489
   (set_attr "movu" "1")
490
   (set_attr "prefix_data16" "0")
491
   (set_attr "prefix_rep" "1")
492
   (set_attr "mode" "TI")])
493
 
494
; Expand patterns for non-temporal stores.  At the moment, only those
495
; that directly map to insns are defined; it would be possible to
496
; define patterns for other modes that would expand to several insns.
497
 
498
(define_expand "storent"
499
  [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
500
        (unspec:SSEMODEF2P
501
          [(match_operand:SSEMODEF2P 1 "register_operand" "")]
502
          UNSPEC_MOVNT))]
503
  "SSE_VEC_FLOAT_MODE_P (mode)"
504
  "")
505
 
506
(define_expand "storent"
507
  [(set (match_operand:MODEF 0 "memory_operand" "")
508
        (unspec:MODEF
509
          [(match_operand:MODEF 1 "register_operand" "")]
510
          UNSPEC_MOVNT))]
511
  "TARGET_SSE4A"
512
  "")
513
 
514
(define_expand "storentv2di"
515
  [(set (match_operand:V2DI 0 "memory_operand" "")
516
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
517
                     UNSPEC_MOVNT))]
518
  "TARGET_SSE2"
519
  "")
520
 
521
(define_expand "storentsi"
522
  [(set (match_operand:SI 0 "memory_operand" "")
523
        (unspec:SI [(match_operand:SI 1 "register_operand" "")]
524
                   UNSPEC_MOVNT))]
525
  "TARGET_SSE2"
526
  "")
527
 
528
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
529
;;
530
;; Parallel floating point arithmetic
531
;;
532
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
533
 
534
(define_expand "2"
535
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
536
        (absneg:SSEMODEF2P
537
          (match_operand:SSEMODEF2P 1 "register_operand" "")))]
538
  "SSE_VEC_FLOAT_MODE_P (mode)"
539
  "ix86_expand_fp_absneg_operator (, mode, operands); DONE;")
540
 
541
(define_expand "3"
542
  [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
543
        (plusminus:AVX256MODEF2P
544
          (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
545
          (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
546
  "AVX256_VEC_FLOAT_MODE_P (mode)"
547
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
548
 
549
(define_insn "*avx_3"
550
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
551
        (plusminus:AVXMODEF2P
552
          (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "x")
553
          (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
554
  "AVX_VEC_FLOAT_MODE_P (mode)
555
   && ix86_binary_operator_ok (, mode, operands)"
556
  "vp\t{%2, %1, %0|%0, %1, %2}"
557
  [(set_attr "type" "sseadd")
558
   (set_attr "prefix" "vex")
559
   (set_attr "mode" "")])
560
 
561
(define_expand "3"
562
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
563
        (plusminus:SSEMODEF2P
564
          (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
565
          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
566
  "SSE_VEC_FLOAT_MODE_P (mode)"
567
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
568
 
569
(define_insn "*3"
570
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
571
        (plusminus:SSEMODEF2P
572
          (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0")
573
          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
574
  "SSE_VEC_FLOAT_MODE_P (mode)
575
   && ix86_binary_operator_ok (, mode, operands)"
576
  "p\t{%2, %0|%0, %2}"
577
  [(set_attr "type" "sseadd")
578
   (set_attr "mode" "")])
579
 
580
(define_insn "*avx_vm3"
581
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
582
        (vec_merge:SSEMODEF2P
583
          (plusminus:SSEMODEF2P
584
            (match_operand:SSEMODEF2P 1 "register_operand" "x")
585
            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
586
          (match_dup 1)
587
          (const_int 1)))]
588
  "AVX128_VEC_FLOAT_MODE_P (mode)"
589
  "vs\t{%2, %1, %0|%0, %1, %2}"
590
  [(set_attr "type" "sseadd")
591
   (set_attr "prefix" "vex")
592
   (set_attr "mode" "")])
593
 
594
(define_insn "_vm3"
595
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
596
        (vec_merge:SSEMODEF2P
597
          (plusminus:SSEMODEF2P
598
            (match_operand:SSEMODEF2P 1 "register_operand" "0")
599
            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
600
          (match_dup 1)
601
          (const_int 1)))]
602
  "SSE_VEC_FLOAT_MODE_P (mode)"
603
  "s\t{%2, %0|%0, %2}"
604
  [(set_attr "type" "sseadd")
605
   (set_attr "mode" "")])
606
 
607
(define_expand "mul3"
608
  [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
609
        (mult:AVX256MODEF2P
610
          (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
611
          (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
612
  "AVX256_VEC_FLOAT_MODE_P (mode)"
613
  "ix86_fixup_binary_operands_no_copy (MULT, mode, operands);")
614
 
615
(define_insn "*avx_mul3"
616
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
617
        (mult:AVXMODEF2P
618
          (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
619
          (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
620
  "AVX_VEC_FLOAT_MODE_P (mode)
621
   && ix86_binary_operator_ok (MULT, mode, operands)"
622
  "vmulp\t{%2, %1, %0|%0, %1, %2}"
623
  [(set_attr "type" "ssemul")
624
   (set_attr "prefix" "vex")
625
   (set_attr "mode" "")])
626
 
627
(define_expand "mul3"
628
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
629
        (mult:SSEMODEF2P
630
          (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
631
          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
632
  "SSE_VEC_FLOAT_MODE_P (mode)"
633
  "ix86_fixup_binary_operands_no_copy (MULT, mode, operands);")
634
 
635
(define_insn "*mul3"
636
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
637
        (mult:SSEMODEF2P
638
          (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
639
          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
640
  "SSE_VEC_FLOAT_MODE_P (mode)
641
   && ix86_binary_operator_ok (MULT, mode, operands)"
642
  "mulp\t{%2, %0|%0, %2}"
643
  [(set_attr "type" "ssemul")
644
   (set_attr "mode" "")])
645
 
646
(define_insn "*avx_vmmul3"
647
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
648
        (vec_merge:SSEMODEF2P
649
          (mult:SSEMODEF2P
650
            (match_operand:SSEMODEF2P 1 "register_operand" "x")
651
            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
652
          (match_dup 1)
653
          (const_int 1)))]
654
  "AVX_VEC_FLOAT_MODE_P (mode)"
655
  "vmuls\t{%2, %1, %0|%0, %1, %2}"
656
  [(set_attr "type" "ssemul")
657
   (set_attr "prefix" "vex")
658
   (set_attr "mode" "")])
659
 
660
(define_insn "_vmmul3"
661
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
662
        (vec_merge:SSEMODEF2P
663
          (mult:SSEMODEF2P
664
            (match_operand:SSEMODEF2P 1 "register_operand" "0")
665
            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
666
          (match_dup 1)
667
          (const_int 1)))]
668
  "SSE_VEC_FLOAT_MODE_P (mode)"
669
  "muls\t{%2, %0|%0, %2}"
670
  [(set_attr "type" "ssemul")
671
   (set_attr "mode" "")])
672
 
673
(define_expand "divv8sf3"
674
  [(set (match_operand:V8SF 0 "register_operand" "")
675
        (div:V8SF (match_operand:V8SF 1 "register_operand" "")
676
                  (match_operand:V8SF 2 "nonimmediate_operand" "")))]
677
  "TARGET_AVX"
678
{
679
  ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
680
 
681
  if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
682
      && flag_finite_math_only && !flag_trapping_math
683
      && flag_unsafe_math_optimizations)
684
    {
685
      ix86_emit_swdivsf (operands[0], operands[1],
686
                         operands[2], V8SFmode);
687
      DONE;
688
    }
689
})
690
 
691
(define_expand "divv4df3"
692
  [(set (match_operand:V4DF 0 "register_operand" "")
693
        (div:V4DF (match_operand:V4DF 1 "register_operand" "")
694
                  (match_operand:V4DF 2 "nonimmediate_operand" "")))]
695
  "TARGET_AVX"
696
  "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
697
 
698
(define_insn "avx_div3"
699
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
700
        (div:AVXMODEF2P
701
          (match_operand:AVXMODEF2P 1 "register_operand" "x")
702
          (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
703
  "AVX_VEC_FLOAT_MODE_P (mode)"
704
  "vdivp\t{%2, %1, %0|%0, %1, %2}"
705
  [(set_attr "type" "ssediv")
706
   (set_attr "prefix" "vex")
707
   (set_attr "mode" "")])
708
 
709
(define_expand "divv4sf3"
710
  [(set (match_operand:V4SF 0 "register_operand" "")
711
        (div:V4SF (match_operand:V4SF 1 "register_operand" "")
712
                  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
713
  "TARGET_SSE"
714
{
715
  if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
716
      && flag_finite_math_only && !flag_trapping_math
717
      && flag_unsafe_math_optimizations)
718
    {
719
      ix86_emit_swdivsf (operands[0], operands[1],
720
                         operands[2], V4SFmode);
721
      DONE;
722
    }
723
})
724
 
725
(define_expand "divv2df3"
726
  [(set (match_operand:V2DF 0 "register_operand" "")
727
        (div:V2DF (match_operand:V2DF 1 "register_operand" "")
728
                  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
729
  "TARGET_SSE2"
730
  "")
731
 
732
(define_insn "*avx_div3"
733
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
734
        (div:SSEMODEF2P
735
          (match_operand:SSEMODEF2P 1 "register_operand" "x")
736
          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
737
  "AVX128_VEC_FLOAT_MODE_P (mode)"
738
  "vdivp\t{%2, %1, %0|%0, %1, %2}"
739
  [(set_attr "type" "ssediv")
740
   (set_attr "prefix" "vex")
741
   (set_attr "mode" "")])
742
 
743
(define_insn "_div3"
744
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
745
        (div:SSEMODEF2P
746
          (match_operand:SSEMODEF2P 1 "register_operand" "0")
747
          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
748
  "SSE_VEC_FLOAT_MODE_P (mode)"
749
  "divp\t{%2, %0|%0, %2}"
750
  [(set_attr "type" "ssediv")
751
   (set_attr "mode" "")])
752
 
753
(define_insn "*avx_vmdiv3"
754
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
755
        (vec_merge:SSEMODEF2P
756
          (div:SSEMODEF2P
757
            (match_operand:SSEMODEF2P 1 "register_operand" "x")
758
            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
759
          (match_dup 1)
760
          (const_int 1)))]
761
  "AVX128_VEC_FLOAT_MODE_P (mode)"
762
  "vdivs\t{%2, %1, %0|%0, %1, %2}"
763
  [(set_attr "type" "ssediv")
764
   (set_attr "prefix" "vex")
765
   (set_attr "mode" "")])
766
 
767
(define_insn "_vmdiv3"
768
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
769
        (vec_merge:SSEMODEF2P
770
          (div:SSEMODEF2P
771
            (match_operand:SSEMODEF2P 1 "register_operand" "0")
772
            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
773
          (match_dup 1)
774
          (const_int 1)))]
775
  "SSE_VEC_FLOAT_MODE_P (mode)"
776
  "divs\t{%2, %0|%0, %2}"
777
  [(set_attr "type" "ssediv")
778
   (set_attr "mode" "")])
779
 
780
(define_insn "avx_rcpv8sf2"
781
  [(set (match_operand:V8SF 0 "register_operand" "=x")
782
        (unspec:V8SF
783
          [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
784
  "TARGET_AVX"
785
  "vrcpps\t{%1, %0|%0, %1}"
786
  [(set_attr "type" "sse")
787
   (set_attr "prefix" "vex")
788
   (set_attr "mode" "V8SF")])
789
 
790
(define_insn "sse_rcpv4sf2"
791
  [(set (match_operand:V4SF 0 "register_operand" "=x")
792
        (unspec:V4SF
793
          [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
794
  "TARGET_SSE"
795
  "%vrcpps\t{%1, %0|%0, %1}"
796
  [(set_attr "type" "sse")
797
   (set_attr "atom_sse_attr" "rcp")
798
   (set_attr "prefix" "maybe_vex")
799
   (set_attr "mode" "V4SF")])
800
 
801
(define_insn "*avx_vmrcpv4sf2"
802
  [(set (match_operand:V4SF 0 "register_operand" "=x")
803
        (vec_merge:V4SF
804
          (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
805
                       UNSPEC_RCP)
806
          (match_operand:V4SF 2 "register_operand" "x")
807
          (const_int 1)))]
808
  "TARGET_AVX"
809
  "vrcpss\t{%1, %2, %0|%0, %2, %1}"
810
  [(set_attr "type" "sse")
811
   (set_attr "prefix" "vex")
812
   (set_attr "mode" "SF")])
813
 
814
(define_insn "sse_vmrcpv4sf2"
815
  [(set (match_operand:V4SF 0 "register_operand" "=x")
816
        (vec_merge:V4SF
817
          (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
818
                       UNSPEC_RCP)
819
          (match_operand:V4SF 2 "register_operand" "0")
820
          (const_int 1)))]
821
  "TARGET_SSE"
822
  "rcpss\t{%1, %0|%0, %1}"
823
  [(set_attr "type" "sse")
824
   (set_attr "atom_sse_attr" "rcp")
825
   (set_attr "mode" "SF")])
826
 
827
(define_expand "sqrtv8sf2"
828
  [(set (match_operand:V8SF 0 "register_operand" "")
829
        (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
830
  "TARGET_AVX"
831
{
832
  if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
833
      && flag_finite_math_only && !flag_trapping_math
834
      && flag_unsafe_math_optimizations)
835
    {
836
      ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
837
      DONE;
838
    }
839
})
840
 
841
(define_insn "avx_sqrtv8sf2"
842
  [(set (match_operand:V8SF 0 "register_operand" "=x")
843
        (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
844
  "TARGET_AVX"
845
  "vsqrtps\t{%1, %0|%0, %1}"
846
  [(set_attr "type" "sse")
847
   (set_attr "prefix" "vex")
848
   (set_attr "mode" "V8SF")])
849
 
850
(define_expand "sqrtv4sf2"
851
  [(set (match_operand:V4SF 0 "register_operand" "")
852
        (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
853
  "TARGET_SSE"
854
{
855
  if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
856
      && flag_finite_math_only && !flag_trapping_math
857
      && flag_unsafe_math_optimizations)
858
    {
859
      ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
860
      DONE;
861
    }
862
})
863
 
864
(define_insn "sse_sqrtv4sf2"
865
  [(set (match_operand:V4SF 0 "register_operand" "=x")
866
        (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
867
  "TARGET_SSE"
868
  "%vsqrtps\t{%1, %0|%0, %1}"
869
  [(set_attr "type" "sse")
870
   (set_attr "atom_sse_attr" "sqrt")
871
   (set_attr "prefix" "maybe_vex")
872
   (set_attr "mode" "V4SF")])
873
 
874
(define_insn "sqrtv4df2"
875
  [(set (match_operand:V4DF 0 "register_operand" "=x")
876
        (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
877
  "TARGET_AVX"
878
  "vsqrtpd\t{%1, %0|%0, %1}"
879
  [(set_attr "type" "sse")
880
   (set_attr "prefix" "vex")
881
   (set_attr "mode" "V4DF")])
882
 
883
(define_insn "sqrtv2df2"
884
  [(set (match_operand:V2DF 0 "register_operand" "=x")
885
        (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
886
  "TARGET_SSE2"
887
  "%vsqrtpd\t{%1, %0|%0, %1}"
888
  [(set_attr "type" "sse")
889
   (set_attr "prefix" "maybe_vex")
890
   (set_attr "mode" "V2DF")])
891
 
892
(define_insn "*avx_vmsqrt2"
893
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
894
        (vec_merge:SSEMODEF2P
895
          (sqrt:SSEMODEF2P
896
            (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
897
          (match_operand:SSEMODEF2P 2 "register_operand" "x")
898
          (const_int 1)))]
899
  "AVX_VEC_FLOAT_MODE_P (mode)"
900
  "vsqrts\t{%1, %2, %0|%0, %2, %1}"
901
  [(set_attr "type" "sse")
902
   (set_attr "prefix" "vex")
903
   (set_attr "mode" "")])
904
 
905
(define_insn "_vmsqrt2"
906
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
907
        (vec_merge:SSEMODEF2P
908
          (sqrt:SSEMODEF2P
909
            (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
910
          (match_operand:SSEMODEF2P 2 "register_operand" "0")
911
          (const_int 1)))]
912
  "SSE_VEC_FLOAT_MODE_P (mode)"
913
  "sqrts\t{%1, %0|%0, %1}"
914
  [(set_attr "type" "sse")
915
   (set_attr "atom_sse_attr" "sqrt")
916
   (set_attr "mode" "")])
917
 
918
(define_expand "rsqrtv8sf2"
919
  [(set (match_operand:V8SF 0 "register_operand" "")
920
        (unspec:V8SF
921
          [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
922
  "TARGET_AVX && TARGET_SSE_MATH"
923
{
924
  ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
925
  DONE;
926
})
927
 
928
(define_insn "avx_rsqrtv8sf2"
929
  [(set (match_operand:V8SF 0 "register_operand" "=x")
930
        (unspec:V8SF
931
          [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
932
  "TARGET_AVX"
933
  "vrsqrtps\t{%1, %0|%0, %1}"
934
  [(set_attr "type" "sse")
935
   (set_attr "prefix" "vex")
936
   (set_attr "mode" "V8SF")])
937
 
938
(define_expand "rsqrtv4sf2"
939
  [(set (match_operand:V4SF 0 "register_operand" "")
940
        (unspec:V4SF
941
          [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
942
  "TARGET_SSE_MATH"
943
{
944
  ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
945
  DONE;
946
})
947
 
948
(define_insn "sse_rsqrtv4sf2"
949
  [(set (match_operand:V4SF 0 "register_operand" "=x")
950
        (unspec:V4SF
951
          [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
952
  "TARGET_SSE"
953
  "%vrsqrtps\t{%1, %0|%0, %1}"
954
  [(set_attr "type" "sse")
955
   (set_attr "prefix" "maybe_vex")
956
   (set_attr "mode" "V4SF")])
957
 
958
(define_insn "*avx_vmrsqrtv4sf2"
959
  [(set (match_operand:V4SF 0 "register_operand" "=x")
960
        (vec_merge:V4SF
961
          (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
962
                       UNSPEC_RSQRT)
963
          (match_operand:V4SF 2 "register_operand" "x")
964
          (const_int 1)))]
965
  "TARGET_AVX"
966
  "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
967
  [(set_attr "type" "sse")
968
   (set_attr "prefix" "vex")
969
   (set_attr "mode" "SF")])
970
 
971
(define_insn "sse_vmrsqrtv4sf2"
972
  [(set (match_operand:V4SF 0 "register_operand" "=x")
973
        (vec_merge:V4SF
974
          (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
975
                       UNSPEC_RSQRT)
976
          (match_operand:V4SF 2 "register_operand" "0")
977
          (const_int 1)))]
978
  "TARGET_SSE"
979
  "rsqrtss\t{%1, %0|%0, %1}"
980
  [(set_attr "type" "sse")
981
   (set_attr "mode" "SF")])
982
 
983
;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
984
;; isn't really correct, as those rtl operators aren't defined when
985
;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
986
 
987
(define_expand "3"
988
  [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
989
        (smaxmin:AVX256MODEF2P
990
          (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
991
          (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
992
  "AVX256_VEC_FLOAT_MODE_P (mode)"
993
{
994
  if (!flag_finite_math_only)
995
    operands[1] = force_reg (mode, operands[1]);
996
  ix86_fixup_binary_operands_no_copy (, mode, operands);
997
})
998
 
999
(define_expand "3"
1000
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1001
        (smaxmin:SSEMODEF2P
1002
          (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1003
          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1004
  "SSE_VEC_FLOAT_MODE_P (mode)"
1005
{
1006
  if (!flag_finite_math_only)
1007
    operands[1] = force_reg (mode, operands[1]);
1008
  ix86_fixup_binary_operands_no_copy (, mode, operands);
1009
})
1010
 
1011
(define_insn "*avx_3_finite"
1012
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1013
        (smaxmin:AVXMODEF2P
1014
          (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1015
          (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1016
  "AVX_VEC_FLOAT_MODE_P (mode) && flag_finite_math_only
1017
   && ix86_binary_operator_ok (, mode, operands)"
1018
  "vp\t{%2, %1, %0|%0, %1, %2}"
1019
  [(set_attr "type" "sseadd")
1020
   (set_attr "prefix" "vex")
1021
   (set_attr "mode" "")])
1022
 
1023
(define_insn "*3_finite"
1024
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1025
        (smaxmin:SSEMODEF2P
1026
          (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1027
          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1028
  "SSE_VEC_FLOAT_MODE_P (mode) && flag_finite_math_only
1029
   && ix86_binary_operator_ok (, mode, operands)"
1030
  "p\t{%2, %0|%0, %2}"
1031
  [(set_attr "type" "sseadd")
1032
   (set_attr "mode" "")])
1033
 
1034
(define_insn "*avx_3"
1035
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1036
        (smaxmin:AVXMODEF2P
1037
          (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1038
          (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1039
  "AVX_VEC_FLOAT_MODE_P (mode)"
1040
  "vp\t{%2, %1, %0|%0, %1, %2}"
1041
  [(set_attr "type" "sseadd")
1042
   (set_attr "prefix" "vex")
1043
   (set_attr "mode" "")])
1044
 
1045
(define_insn "*3"
1046
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1047
        (smaxmin:SSEMODEF2P
1048
          (match_operand:SSEMODEF2P 1 "register_operand" "0")
1049
          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1050
  "SSE_VEC_FLOAT_MODE_P (mode)"
1051
  "p\t{%2, %0|%0, %2}"
1052
  [(set_attr "type" "sseadd")
1053
   (set_attr "mode" "")])
1054
 
1055
(define_insn "*avx_vm3"
1056
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1057
        (vec_merge:SSEMODEF2P
1058
          (smaxmin:SSEMODEF2P
1059
            (match_operand:SSEMODEF2P 1 "register_operand" "x")
1060
            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1061
         (match_dup 1)
1062
         (const_int 1)))]
1063
  "AVX128_VEC_FLOAT_MODE_P (mode)"
1064
  "vs\t{%2, %1, %0|%0, %1, %2}"
1065
  [(set_attr "type" "sse")
1066
   (set_attr "prefix" "vex")
1067
   (set_attr "mode" "")])
1068
 
1069
(define_insn "_vm3"
1070
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1071
        (vec_merge:SSEMODEF2P
1072
          (smaxmin:SSEMODEF2P
1073
            (match_operand:SSEMODEF2P 1 "register_operand" "0")
1074
            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1075
         (match_dup 1)
1076
         (const_int 1)))]
1077
  "SSE_VEC_FLOAT_MODE_P (mode)"
1078
  "s\t{%2, %0|%0, %2}"
1079
  [(set_attr "type" "sseadd")
1080
   (set_attr "mode" "")])
1081
 
1082
;; These versions of the min/max patterns implement exactly the operations
1083
;;   min = (op1 < op2 ? op1 : op2)
1084
;;   max = (!(op1 < op2) ? op1 : op2)
1085
;; Their operands are not commutative, and thus they may be used in the
1086
;; presence of -0.0 and NaN.
1087
 
1088
(define_insn "*avx_ieee_smin3"
1089
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1090
        (unspec:AVXMODEF2P
1091
          [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1092
           (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1093
         UNSPEC_IEEE_MIN))]
1094
  "AVX_VEC_FLOAT_MODE_P (mode)"
1095
  "vminp\t{%2, %1, %0|%0, %1, %2}"
1096
  [(set_attr "type" "sseadd")
1097
   (set_attr "prefix" "vex")
1098
   (set_attr "mode" "")])
1099
 
1100
(define_insn "*avx_ieee_smax3"
1101
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1102
        (unspec:AVXMODEF2P
1103
          [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1104
           (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
1105
         UNSPEC_IEEE_MAX))]
1106
  "AVX_VEC_FLOAT_MODE_P (mode)"
1107
  "vmaxp\t{%2, %1, %0|%0, %1, %2}"
1108
  [(set_attr "type" "sseadd")
1109
   (set_attr "prefix" "vex")
1110
   (set_attr "mode" "")])
1111
 
1112
(define_insn "*ieee_smin3"
1113
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1114
        (unspec:SSEMODEF2P
1115
          [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1116
           (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1117
         UNSPEC_IEEE_MIN))]
1118
  "SSE_VEC_FLOAT_MODE_P (mode)"
1119
  "minp\t{%2, %0|%0, %2}"
1120
  [(set_attr "type" "sseadd")
1121
   (set_attr "mode" "")])
1122
 
1123
(define_insn "*ieee_smax3"
1124
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1125
        (unspec:SSEMODEF2P
1126
          [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1127
           (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
1128
         UNSPEC_IEEE_MAX))]
1129
  "SSE_VEC_FLOAT_MODE_P (mode)"
1130
  "maxp\t{%2, %0|%0, %2}"
1131
  [(set_attr "type" "sseadd")
1132
   (set_attr "mode" "")])
1133
 
1134
(define_insn "avx_addsubv8sf3"
1135
  [(set (match_operand:V8SF 0 "register_operand" "=x")
1136
        (vec_merge:V8SF
1137
          (plus:V8SF
1138
            (match_operand:V8SF 1 "register_operand" "x")
1139
            (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1140
          (minus:V8SF (match_dup 1) (match_dup 2))
1141
          (const_int 170)))]
1142
  "TARGET_AVX"
1143
  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1144
  [(set_attr "type" "sseadd")
1145
   (set_attr "prefix" "vex")
1146
   (set_attr "mode" "V8SF")])
1147
 
1148
(define_insn "avx_addsubv4df3"
1149
  [(set (match_operand:V4DF 0 "register_operand" "=x")
1150
        (vec_merge:V4DF
1151
          (plus:V4DF
1152
            (match_operand:V4DF 1 "register_operand" "x")
1153
            (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1154
          (minus:V4DF (match_dup 1) (match_dup 2))
1155
          (const_int 10)))]
1156
  "TARGET_AVX"
1157
  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1158
  [(set_attr "type" "sseadd")
1159
   (set_attr "prefix" "vex")
1160
   (set_attr "mode" "V4DF")])
1161
 
1162
(define_insn "*avx_addsubv4sf3"
1163
  [(set (match_operand:V4SF 0 "register_operand" "=x")
1164
        (vec_merge:V4SF
1165
          (plus:V4SF
1166
            (match_operand:V4SF 1 "register_operand" "x")
1167
            (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1168
          (minus:V4SF (match_dup 1) (match_dup 2))
1169
          (const_int 10)))]
1170
  "TARGET_AVX"
1171
  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1172
  [(set_attr "type" "sseadd")
1173
   (set_attr "prefix" "vex")
1174
   (set_attr "mode" "V4SF")])
1175
 
1176
(define_insn "sse3_addsubv4sf3"
1177
  [(set (match_operand:V4SF 0 "register_operand" "=x")
1178
        (vec_merge:V4SF
1179
          (plus:V4SF
1180
            (match_operand:V4SF 1 "register_operand" "0")
1181
            (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1182
          (minus:V4SF (match_dup 1) (match_dup 2))
1183
          (const_int 10)))]
1184
  "TARGET_SSE3"
1185
  "addsubps\t{%2, %0|%0, %2}"
1186
  [(set_attr "type" "sseadd")
1187
   (set_attr "prefix_rep" "1")
1188
   (set_attr "mode" "V4SF")])
1189
 
1190
(define_insn "*avx_addsubv2df3"
1191
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1192
        (vec_merge:V2DF
1193
          (plus:V2DF
1194
            (match_operand:V2DF 1 "register_operand" "x")
1195
            (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1196
          (minus:V2DF (match_dup 1) (match_dup 2))
1197
          (const_int 2)))]
1198
  "TARGET_AVX"
1199
  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1200
  [(set_attr "type" "sseadd")
1201
   (set_attr "prefix" "vex")
1202
   (set_attr "mode" "V2DF")])
1203
 
1204
(define_insn "sse3_addsubv2df3"
1205
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1206
        (vec_merge:V2DF
1207
          (plus:V2DF
1208
            (match_operand:V2DF 1 "register_operand" "0")
1209
            (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1210
          (minus:V2DF (match_dup 1) (match_dup 2))
1211
          (const_int 2)))]
1212
  "TARGET_SSE3"
1213
  "addsubpd\t{%2, %0|%0, %2}"
1214
  [(set_attr "type" "sseadd")
1215
   (set_attr "atom_unit" "complex")
1216
   (set_attr "mode" "V2DF")])
1217
 
1218
(define_insn "avx_hv4df3"
1219
  [(set (match_operand:V4DF 0 "register_operand" "=x")
1220
        (vec_concat:V4DF
1221
          (vec_concat:V2DF
1222
            (plusminus:DF
1223
              (vec_select:DF
1224
                (match_operand:V4DF 1 "register_operand" "x")
1225
                (parallel [(const_int 0)]))
1226
              (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1227
            (plusminus:DF
1228
              (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1229
              (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1230
          (vec_concat:V2DF
1231
            (plusminus:DF
1232
              (vec_select:DF
1233
                (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1234
                (parallel [(const_int 0)]))
1235
              (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1236
            (plusminus:DF
1237
              (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1238
              (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1239
  "TARGET_AVX"
1240
  "vhpd\t{%2, %1, %0|%0, %1, %2}"
1241
  [(set_attr "type" "sseadd")
1242
   (set_attr "prefix" "vex")
1243
   (set_attr "mode" "V4DF")])
1244
 
1245
(define_insn "avx_hv8sf3"
1246
  [(set (match_operand:V8SF 0 "register_operand" "=x")
1247
        (vec_concat:V8SF
1248
          (vec_concat:V4SF
1249
            (vec_concat:V2SF
1250
              (plusminus:SF
1251
                (vec_select:SF
1252
                  (match_operand:V8SF 1 "register_operand" "x")
1253
                  (parallel [(const_int 0)]))
1254
                (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1255
              (plusminus:SF
1256
                (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1257
                (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1258
            (vec_concat:V2SF
1259
              (plusminus:SF
1260
                (vec_select:SF
1261
                  (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1262
                  (parallel [(const_int 0)]))
1263
                (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1264
              (plusminus:SF
1265
                (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1266
                (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1267
          (vec_concat:V4SF
1268
            (vec_concat:V2SF
1269
              (plusminus:SF
1270
                (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1271
                (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1272
              (plusminus:SF
1273
                (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1274
                (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1275
            (vec_concat:V2SF
1276
              (plusminus:SF
1277
                (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1278
                (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1279
              (plusminus:SF
1280
                (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1281
                (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1282
  "TARGET_AVX"
1283
  "vhps\t{%2, %1, %0|%0, %1, %2}"
1284
  [(set_attr "type" "sseadd")
1285
   (set_attr "prefix" "vex")
1286
   (set_attr "mode" "V8SF")])
1287
 
1288
(define_insn "*avx_hv4sf3"
1289
  [(set (match_operand:V4SF 0 "register_operand" "=x")
1290
        (vec_concat:V4SF
1291
          (vec_concat:V2SF
1292
            (plusminus:SF
1293
              (vec_select:SF
1294
                (match_operand:V4SF 1 "register_operand" "x")
1295
                (parallel [(const_int 0)]))
1296
              (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1297
            (plusminus:SF
1298
              (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1299
              (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1300
          (vec_concat:V2SF
1301
            (plusminus:SF
1302
              (vec_select:SF
1303
                (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1304
                (parallel [(const_int 0)]))
1305
              (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1306
            (plusminus:SF
1307
              (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1308
              (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1309
  "TARGET_AVX"
1310
  "vhps\t{%2, %1, %0|%0, %1, %2}"
1311
  [(set_attr "type" "sseadd")
1312
   (set_attr "prefix" "vex")
1313
   (set_attr "mode" "V4SF")])
1314
 
1315
(define_insn "sse3_hv4sf3"
1316
  [(set (match_operand:V4SF 0 "register_operand" "=x")
1317
        (vec_concat:V4SF
1318
          (vec_concat:V2SF
1319
            (plusminus:SF
1320
              (vec_select:SF
1321
                (match_operand:V4SF 1 "register_operand" "0")
1322
                (parallel [(const_int 0)]))
1323
              (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1324
            (plusminus:SF
1325
              (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1326
              (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1327
          (vec_concat:V2SF
1328
            (plusminus:SF
1329
              (vec_select:SF
1330
                (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1331
                (parallel [(const_int 0)]))
1332
              (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1333
            (plusminus:SF
1334
              (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1335
              (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1336
  "TARGET_SSE3"
1337
  "hps\t{%2, %0|%0, %2}"
1338
  [(set_attr "type" "sseadd")
1339
   (set_attr "atom_unit" "complex")
1340
   (set_attr "prefix_rep" "1")
1341
   (set_attr "mode" "V4SF")])
1342
 
1343
(define_insn "*avx_hv2df3"
1344
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1345
        (vec_concat:V2DF
1346
          (plusminus:DF
1347
            (vec_select:DF
1348
              (match_operand:V2DF 1 "register_operand" "x")
1349
              (parallel [(const_int 0)]))
1350
            (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1351
          (plusminus:DF
1352
            (vec_select:DF
1353
              (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1354
              (parallel [(const_int 0)]))
1355
            (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1356
  "TARGET_AVX"
1357
  "vhpd\t{%2, %1, %0|%0, %1, %2}"
1358
  [(set_attr "type" "sseadd")
1359
   (set_attr "prefix" "vex")
1360
   (set_attr "mode" "V2DF")])
1361
 
1362
(define_insn "sse3_hv2df3"
1363
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1364
        (vec_concat:V2DF
1365
          (plusminus:DF
1366
            (vec_select:DF
1367
              (match_operand:V2DF 1 "register_operand" "0")
1368
              (parallel [(const_int 0)]))
1369
            (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1370
          (plusminus:DF
1371
            (vec_select:DF
1372
              (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1373
              (parallel [(const_int 0)]))
1374
            (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1375
  "TARGET_SSE3"
1376
  "hpd\t{%2, %0|%0, %2}"
1377
  [(set_attr "type" "sseadd")
1378
   (set_attr "mode" "V2DF")])
1379
 
1380
(define_expand "reduc_splus_v4sf"
1381
  [(match_operand:V4SF 0 "register_operand" "")
1382
   (match_operand:V4SF 1 "register_operand" "")]
1383
  "TARGET_SSE"
1384
{
1385
  if (TARGET_SSE3)
1386
    {
1387
      rtx tmp = gen_reg_rtx (V4SFmode);
1388
      emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1389
      emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1390
    }
1391
  else
1392
    ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1393
  DONE;
1394
})
1395
 
1396
(define_expand "reduc_splus_v2df"
1397
  [(match_operand:V2DF 0 "register_operand" "")
1398
   (match_operand:V2DF 1 "register_operand" "")]
1399
  "TARGET_SSE3"
1400
{
1401
  emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1402
  DONE;
1403
})
1404
 
1405
(define_expand "reduc_smax_v4sf"
1406
  [(match_operand:V4SF 0 "register_operand" "")
1407
   (match_operand:V4SF 1 "register_operand" "")]
1408
  "TARGET_SSE"
1409
{
1410
  ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1411
  DONE;
1412
})
1413
 
1414
(define_expand "reduc_smin_v4sf"
1415
  [(match_operand:V4SF 0 "register_operand" "")
1416
   (match_operand:V4SF 1 "register_operand" "")]
1417
  "TARGET_SSE"
1418
{
1419
  ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1420
  DONE;
1421
})
1422
 
1423
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1424
;;
1425
;; Parallel floating point comparisons
1426
;;
1427
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1428
 
1429
(define_insn "avx_cmpp3"
1430
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1431
        (unspec:AVXMODEF2P
1432
          [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1433
           (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
1434
           (match_operand:SI 3 "const_0_to_31_operand" "n")]
1435
          UNSPEC_PCMP))]
1436
  "TARGET_AVX"
1437
  "vcmpp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1438
  [(set_attr "type" "ssecmp")
1439
   (set_attr "length_immediate" "1")
1440
   (set_attr "prefix" "vex")
1441
   (set_attr "mode" "")])
1442
 
1443
(define_insn "avx_cmps3"
1444
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1445
        (vec_merge:SSEMODEF2P
1446
          (unspec:SSEMODEF2P
1447
            [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1448
             (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
1449
             (match_operand:SI 3 "const_0_to_31_operand" "n")]
1450
            UNSPEC_PCMP)
1451
         (match_dup 1)
1452
         (const_int 1)))]
1453
  "TARGET_AVX"
1454
  "vcmps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1455
  [(set_attr "type" "ssecmp")
1456
   (set_attr "length_immediate" "1")
1457
   (set_attr "prefix" "vex")
1458
   (set_attr "mode" "")])
1459
 
1460
;; We don't promote 128bit vector compare intrinsics. But vectorizer
1461
;; may generate 256bit vector compare instructions.
1462
(define_insn "*avx_maskcmp3"
1463
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1464
        (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
1465
                [(match_operand:AVXMODEF2P 1 "register_operand" "x")
1466
                 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1467
  "AVX_VEC_FLOAT_MODE_P (mode)"
1468
  "vcmp%D3p\t{%2, %1, %0|%0, %1, %2}"
1469
  [(set_attr "type" "ssecmp")
1470
   (set_attr "prefix" "vex")
1471
   (set_attr "length_immediate" "1")
1472
   (set_attr "mode" "")])
1473
 
1474
(define_insn "_maskcmp3"
1475
  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1476
        (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1477
                [(match_operand:SSEMODEF4 1 "register_operand" "0")
1478
                 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1479
  "!TARGET_XOP
1480
  && (SSE_FLOAT_MODE_P (mode) || SSE_VEC_FLOAT_MODE_P (mode))"
1481
  "cmp%D3\t{%2, %0|%0, %2}"
1482
  [(set_attr "type" "ssecmp")
1483
   (set_attr "length_immediate" "1")
1484
   (set_attr "mode" "")])
1485
 
1486
(define_insn "*avx_vmmaskcmp3"
1487
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1488
        (vec_merge:SSEMODEF2P
1489
         (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1490
                [(match_operand:SSEMODEF2P 1 "register_operand" "x")
1491
                 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1492
         (match_dup 1)
1493
         (const_int 1)))]
1494
  "AVX_VEC_FLOAT_MODE_P (mode)"
1495
  "vcmp%D3s\t{%2, %1, %0|%0, %1, %2}"
1496
  [(set_attr "type" "ssecmp")
1497
   (set_attr "prefix" "vex")
1498
   (set_attr "mode" "")])
1499
 
1500
(define_insn "_vmmaskcmp3"
1501
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1502
        (vec_merge:SSEMODEF2P
1503
         (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1504
                [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1505
                 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1506
         (match_dup 1)
1507
         (const_int 1)))]
1508
  "SSE_VEC_FLOAT_MODE_P (mode)"
1509
  "cmp%D3s\t{%2, %0|%0, %2}"
1510
  [(set_attr "type" "ssecmp")
1511
   (set_attr "length_immediate" "1")
1512
   (set_attr "mode" "")])
1513
 
1514
(define_insn "_comi"
1515
  [(set (reg:CCFP FLAGS_REG)
1516
        (compare:CCFP
1517
          (vec_select:MODEF
1518
            (match_operand: 0 "register_operand" "x")
1519
            (parallel [(const_int 0)]))
1520
          (vec_select:MODEF
1521
            (match_operand: 1 "nonimmediate_operand" "xm")
1522
            (parallel [(const_int 0)]))))]
1523
  "SSE_FLOAT_MODE_P (mode)"
1524
  "%vcomis\t{%1, %0|%0, %1}"
1525
  [(set_attr "type" "ssecomi")
1526
   (set_attr "prefix" "maybe_vex")
1527
   (set_attr "prefix_rep" "0")
1528
   (set (attr "prefix_data16")
1529
        (if_then_else (eq_attr "mode" "DF")
1530
                      (const_string "1")
1531
                      (const_string "0")))
1532
   (set_attr "mode" "")])
1533
 
1534
(define_insn "_ucomi"
1535
  [(set (reg:CCFPU FLAGS_REG)
1536
        (compare:CCFPU
1537
          (vec_select:MODEF
1538
            (match_operand: 0 "register_operand" "x")
1539
            (parallel [(const_int 0)]))
1540
          (vec_select:MODEF
1541
            (match_operand: 1 "nonimmediate_operand" "xm")
1542
            (parallel [(const_int 0)]))))]
1543
  "SSE_FLOAT_MODE_P (mode)"
1544
  "%vucomis\t{%1, %0|%0, %1}"
1545
  [(set_attr "type" "ssecomi")
1546
   (set_attr "prefix" "maybe_vex")
1547
   (set_attr "prefix_rep" "0")
1548
   (set (attr "prefix_data16")
1549
        (if_then_else (eq_attr "mode" "DF")
1550
                      (const_string "1")
1551
                      (const_string "0")))
1552
   (set_attr "mode" "")])
1553
 
1554
(define_expand "vcond"
1555
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1556
        (if_then_else:SSEMODEF2P
1557
          (match_operator 3 ""
1558
            [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
1559
             (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
1560
          (match_operand:SSEMODEF2P 1 "general_operand" "")
1561
          (match_operand:SSEMODEF2P 2 "general_operand" "")))]
1562
  "SSE_VEC_FLOAT_MODE_P (mode)"
1563
{
1564
  bool ok = ix86_expand_fp_vcond (operands);
1565
  gcc_assert (ok);
1566
  DONE;
1567
})
1568
 
1569
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1570
;;
1571
;; Parallel floating point logical operations
1572
;;
1573
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1574
 
1575
(define_insn "avx_andnot3"
1576
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1577
        (and:AVXMODEF2P
1578
          (not:AVXMODEF2P
1579
            (match_operand:AVXMODEF2P 1 "register_operand" "x"))
1580
          (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1581
  "AVX_VEC_FLOAT_MODE_P (mode)"
1582
  "vandnp\t{%2, %1, %0|%0, %1, %2}"
1583
  [(set_attr "type" "sselog")
1584
   (set_attr "prefix" "vex")
1585
   (set_attr "mode" "")])
1586
 
1587
(define_insn "_andnot3"
1588
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1589
        (and:SSEMODEF2P
1590
          (not:SSEMODEF2P
1591
            (match_operand:SSEMODEF2P 1 "register_operand" "0"))
1592
          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1593
  "SSE_VEC_FLOAT_MODE_P (mode)"
1594
  "andnp\t{%2, %0|%0, %2}"
1595
  [(set_attr "type" "sselog")
1596
   (set_attr "mode" "")])
1597
 
1598
(define_expand "3"
1599
  [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1600
        (any_logic:AVX256MODEF2P
1601
          (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1602
          (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1603
  "AVX256_VEC_FLOAT_MODE_P (mode)"
1604
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
1605
 
1606
(define_insn "*avx_3"
1607
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1608
        (any_logic:AVXMODEF2P
1609
          (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1610
          (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1611
  "AVX_VEC_FLOAT_MODE_P (mode)
1612
   && ix86_binary_operator_ok (, mode, operands)"
1613
  "vp\t{%2, %1, %0|%0, %1, %2}"
1614
  [(set_attr "type" "sselog")
1615
   (set_attr "prefix" "vex")
1616
   (set_attr "mode" "")])
1617
 
1618
(define_expand "3"
1619
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1620
        (any_logic:SSEMODEF2P
1621
          (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1622
          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1623
  "SSE_VEC_FLOAT_MODE_P (mode)"
1624
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
1625
 
1626
(define_insn "*3"
1627
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1628
        (any_logic:SSEMODEF2P
1629
          (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1630
          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1631
  "SSE_VEC_FLOAT_MODE_P (mode)
1632
   && ix86_binary_operator_ok (, mode, operands)"
1633
  "p\t{%2, %0|%0, %2}"
1634
  [(set_attr "type" "sselog")
1635
   (set_attr "mode" "")])
1636
 
1637
(define_expand "copysign3"
1638
  [(set (match_dup 4)
1639
        (and:SSEMODEF2P
1640
          (not:SSEMODEF2P (match_dup 3))
1641
          (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1642
   (set (match_dup 5)
1643
        (and:SSEMODEF2P (match_dup 3)
1644
                        (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1645
   (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1646
        (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1647
  "SSE_VEC_FLOAT_MODE_P (mode)"
1648
{
1649
  operands[3] = ix86_build_signbit_mask (mode, 1, 0);
1650
 
1651
  operands[4] = gen_reg_rtx (mode);
1652
  operands[5] = gen_reg_rtx (mode);
1653
})
1654
 
1655
;; Also define scalar versions.  These are used for abs, neg, and
1656
;; conditional move.  Using subregs into vector modes causes register
1657
;; allocation lossage.  These patterns do not allow memory operands
1658
;; because the native instructions read the full 128-bits.
1659
 
1660
(define_insn "*avx_andnot3"
1661
  [(set (match_operand:MODEF 0 "register_operand" "=x")
1662
        (and:MODEF
1663
          (not:MODEF
1664
            (match_operand:MODEF 1 "register_operand" "x"))
1665
            (match_operand:MODEF 2 "register_operand" "x")))]
1666
  "AVX_FLOAT_MODE_P (mode)"
1667
  "vandnp\t{%2, %1, %0|%0, %1, %2}"
1668
  [(set_attr "type" "sselog")
1669
   (set_attr "prefix" "vex")
1670
   (set_attr "mode" "")])
1671
 
1672
(define_insn "*andnot3"
1673
  [(set (match_operand:MODEF 0 "register_operand" "=x")
1674
        (and:MODEF
1675
          (not:MODEF
1676
            (match_operand:MODEF 1 "register_operand" "0"))
1677
            (match_operand:MODEF 2 "register_operand" "x")))]
1678
  "SSE_FLOAT_MODE_P (mode)"
1679
  "andnp\t{%2, %0|%0, %2}"
1680
  [(set_attr "type" "sselog")
1681
   (set_attr "mode" "")])
1682
 
1683
(define_insn "*avx_3"
1684
  [(set (match_operand:MODEF 0 "register_operand" "=x")
1685
        (any_logic:MODEF
1686
          (match_operand:MODEF 1 "register_operand" "x")
1687
          (match_operand:MODEF 2 "register_operand" "x")))]
1688
  "AVX_FLOAT_MODE_P (mode)"
1689
  "vp\t{%2, %1, %0|%0, %1, %2}"
1690
  [(set_attr "type" "sselog")
1691
   (set_attr "prefix" "vex")
1692
   (set_attr "mode" "")])
1693
 
1694
(define_insn "*3"
1695
  [(set (match_operand:MODEF 0 "register_operand" "=x")
1696
        (any_logic:MODEF
1697
          (match_operand:MODEF 1 "register_operand" "0")
1698
          (match_operand:MODEF 2 "register_operand" "x")))]
1699
  "SSE_FLOAT_MODE_P (mode)"
1700
  "p\t{%2, %0|%0, %2}"
1701
  [(set_attr "type" "sselog")
1702
   (set_attr "mode" "")])
1703
 
1704
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1705
;;
1706
;; FMA4 floating point multiply/accumulate instructions.  This
1707
;; includes the scalar version of the instructions as well as the
1708
;; vector.
1709
;;
1710
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1711
 
1712
;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1713
;; combine to generate a multiply/add with two memory references.  We then
1714
;; split this insn, into loading up the destination register with one of the
1715
;; memory operations.  If we don't manage to split the insn, reload will
1716
;; generate the appropriate moves.  The reason this is needed, is that combine
1717
;; has already folded one of the memory references into both the multiply and
1718
;; add insns, and it can't generate a new pseudo.  I.e.:
1719
;;      (set (reg1) (mem (addr1)))
1720
;;      (set (reg2) (mult (reg1) (mem (addr2))))
1721
;;      (set (reg3) (plus (reg2) (mem (addr3))))
1722
 
1723
(define_insn "fma4_fmadd4256"
1724
  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1725
        (plus:FMA4MODEF4
1726
         (mult:FMA4MODEF4
1727
          (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1728
          (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1729
         (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1730
  "TARGET_FMA4 && TARGET_FUSED_MADD"
1731
  "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1732
  [(set_attr "type" "ssemuladd")
1733
   (set_attr "mode" "")])
1734
 
1735
;; Floating multiply and subtract.
1736
(define_insn "fma4_fmsub4256"
1737
  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1738
        (minus:FMA4MODEF4
1739
         (mult:FMA4MODEF4
1740
          (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1741
          (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1742
         (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1743
  "TARGET_FMA4 && TARGET_FUSED_MADD"
1744
  "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1745
  [(set_attr "type" "ssemuladd")
1746
   (set_attr "mode" "")])
1747
 
1748
;; Floating point negative multiply and add.
1749
;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1750
(define_insn "fma4_fnmadd4256"
1751
  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1752
        (minus:FMA4MODEF4
1753
         (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1754
         (mult:FMA4MODEF4
1755
          (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1756
          (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
1757
  "TARGET_FMA4 && TARGET_FUSED_MADD"
1758
  "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1759
  [(set_attr "type" "ssemuladd")
1760
   (set_attr "mode" "")])
1761
 
1762
;; Floating point negative multiply and subtract.
1763
(define_insn "fma4_fnmsub4256"
1764
  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1765
        (minus:FMA4MODEF4
1766
         (mult:FMA4MODEF4
1767
          (neg:FMA4MODEF4
1768
           (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1769
          (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1770
         (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1771
  "TARGET_FMA4 && TARGET_FUSED_MADD"
1772
  "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1773
  [(set_attr "type" "ssemuladd")
1774
   (set_attr "mode" "")])
1775
 
1776
(define_insn "fma4_fmadd4"
1777
  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1778
        (plus:SSEMODEF4
1779
         (mult:SSEMODEF4
1780
          (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1781
          (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1782
         (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1783
  "TARGET_FMA4 && TARGET_FUSED_MADD"
1784
  "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1785
  [(set_attr "type" "ssemuladd")
1786
   (set_attr "mode" "")])
1787
 
1788
;; For the scalar operations, use operand1 for the upper words that aren't
1789
;; modified, so restrict the forms that are generated.
1790
;; Scalar version of fmadd.
1791
(define_insn "fma4_vmfmadd4"
1792
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1793
        (vec_merge:SSEMODEF2P
1794
         (plus:SSEMODEF2P
1795
          (mult:SSEMODEF2P
1796
           (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1797
           (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1798
          (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1799
         (match_dup 0)
1800
         (const_int 1)))]
1801
  "TARGET_FMA4 && TARGET_FUSED_MADD"
1802
  "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1803
  [(set_attr "type" "ssemuladd")
1804
   (set_attr "mode" "")])
1805
 
1806
;; Floating multiply and subtract.
1807
;; Allow two memory operands the same as fmadd.
1808
(define_insn "fma4_fmsub4"
1809
  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1810
        (minus:SSEMODEF4
1811
         (mult:SSEMODEF4
1812
          (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1813
          (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1814
         (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1815
  "TARGET_FMA4 && TARGET_FUSED_MADD"
1816
  "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1817
  [(set_attr "type" "ssemuladd")
1818
   (set_attr "mode" "")])
1819
 
1820
;; For the scalar operations, use operand1 for the upper words that aren't
1821
;; modified, so restrict the forms that are generated.
1822
;; Scalar version of fmsub.
1823
(define_insn "fma4_vmfmsub4"
1824
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1825
        (vec_merge:SSEMODEF2P
1826
         (minus:SSEMODEF2P
1827
          (mult:SSEMODEF2P
1828
           (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1829
           (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1830
          (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1831
         (match_dup 0)
1832
         (const_int 1)))]
1833
  "TARGET_FMA4 && TARGET_FUSED_MADD"
1834
  "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1835
  [(set_attr "type" "ssemuladd")
1836
   (set_attr "mode" "")])
1837
 
1838
;; Floating point negative multiply and add.
1839
;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1840
(define_insn "fma4_fnmadd4"
1841
  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1842
        (minus:SSEMODEF4
1843
         (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
1844
         (mult:SSEMODEF4
1845
          (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1846
          (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
1847
  "TARGET_FMA4 && TARGET_FUSED_MADD"
1848
  "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1849
  [(set_attr "type" "ssemuladd")
1850
   (set_attr "mode" "")])
1851
 
1852
;; For the scalar operations, use operand1 for the upper words that aren't
1853
;; modified, so restrict the forms that are generated.
1854
;; Scalar version of fnmadd.
1855
(define_insn "fma4_vmfnmadd4"
1856
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1857
        (vec_merge:SSEMODEF2P
1858
         (minus:SSEMODEF2P
1859
          (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1860
          (mult:SSEMODEF2P
1861
           (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1862
           (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
1863
         (match_dup 0)
1864
         (const_int 1)))]
1865
  "TARGET_FMA4 && TARGET_FUSED_MADD"
1866
  "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1867
  [(set_attr "type" "ssemuladd")
1868
   (set_attr "mode" "")])
1869
 
1870
;; Floating point negative multiply and subtract.
1871
;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c.
1872
(define_insn "fma4_fnmsub4"
1873
  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1874
        (minus:SSEMODEF4
1875
         (mult:SSEMODEF4
1876
          (neg:SSEMODEF4
1877
           (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x"))
1878
          (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1879
         (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1880
  "TARGET_FMA4 && TARGET_FUSED_MADD"
1881
  "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1882
  [(set_attr "type" "ssemuladd")
1883
   (set_attr "mode" "")])
1884
 
1885
;; For the scalar operations, use operand1 for the upper words that aren't
1886
;; modified, so restrict the forms that are generated.
1887
;; Scalar version of fnmsub.
1888
(define_insn "fma4_vmfnmsub4"
1889
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1890
        (vec_merge:SSEMODEF2P
1891
         (minus:SSEMODEF2P
1892
          (mult:SSEMODEF2P
1893
           (neg:SSEMODEF2P
1894
            (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1895
           (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1896
          (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1897
         (match_dup 0)
1898
         (const_int 1)))]
1899
  "TARGET_FMA4 && TARGET_FUSED_MADD"
1900
  "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1901
  [(set_attr "type" "ssemuladd")
1902
   (set_attr "mode" "")])
1903
 
1904
(define_insn "fma4i_fmadd4256"
1905
  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1906
        (unspec:FMA4MODEF4
1907
         [(plus:FMA4MODEF4
1908
           (mult:FMA4MODEF4
1909
            (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1910
            (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1911
           (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1912
         UNSPEC_FMA4_INTRINSIC))]
1913
  "TARGET_FMA4"
1914
  "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1915
  [(set_attr "type" "ssemuladd")
1916
   (set_attr "mode" "")])
1917
 
1918
(define_insn "fma4i_fmsub4256"
1919
  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1920
        (unspec:FMA4MODEF4
1921
         [(minus:FMA4MODEF4
1922
           (mult:FMA4MODEF4
1923
            (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1924
            (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1925
           (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1926
         UNSPEC_FMA4_INTRINSIC))]
1927
  "TARGET_FMA4"
1928
  "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1929
  [(set_attr "type" "ssemuladd")
1930
   (set_attr "mode" "")])
1931
 
1932
(define_insn "fma4i_fnmadd4256"
1933
  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1934
        (unspec:FMA4MODEF4
1935
         [(minus:FMA4MODEF4
1936
           (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1937
           (mult:FMA4MODEF4
1938
            (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1939
            (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
1940
         UNSPEC_FMA4_INTRINSIC))]
1941
  "TARGET_FMA4"
1942
  "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1943
  [(set_attr "type" "ssemuladd")
1944
   (set_attr "mode" "")])
1945
 
1946
(define_insn "fma4i_fnmsub4256"
1947
  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1948
        (unspec:FMA4MODEF4
1949
         [(minus:FMA4MODEF4
1950
           (mult:FMA4MODEF4
1951
            (neg:FMA4MODEF4
1952
             (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1953
            (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1954
           (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1955
         UNSPEC_FMA4_INTRINSIC))]
1956
  "TARGET_FMA4"
1957
  "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1958
  [(set_attr "type" "ssemuladd")
1959
   (set_attr "mode" "")])
1960
 
1961
(define_insn "fma4i_fmadd4"
1962
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1963
        (unspec:SSEMODEF2P
1964
         [(plus:SSEMODEF2P
1965
           (mult:SSEMODEF2P
1966
            (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1967
            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1968
           (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1969
         UNSPEC_FMA4_INTRINSIC))]
1970
  "TARGET_FMA4"
1971
  "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1972
  [(set_attr "type" "ssemuladd")
1973
   (set_attr "mode" "")])
1974
 
1975
(define_insn "fma4i_fmsub4"
1976
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1977
        (unspec:SSEMODEF2P
1978
         [(minus:SSEMODEF2P
1979
           (mult:SSEMODEF2P
1980
            (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1981
            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1982
           (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1983
         UNSPEC_FMA4_INTRINSIC))]
1984
  "TARGET_FMA4"
1985
  "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1986
  [(set_attr "type" "ssemuladd")
1987
   (set_attr "mode" "")])
1988
 
1989
(define_insn "fma4i_fnmadd4"
1990
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1991
        (unspec:SSEMODEF2P
1992
         [(minus:SSEMODEF2P
1993
           (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1994
           (mult:SSEMODEF2P
1995
            (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1996
            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
1997
         UNSPEC_FMA4_INTRINSIC))]
1998
  "TARGET_FMA4"
1999
  "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2000
  [(set_attr "type" "ssemuladd")
2001
   (set_attr "mode" "")])
2002
 
2003
(define_insn "fma4i_fnmsub4"
2004
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2005
        (unspec:SSEMODEF2P
2006
         [(minus:SSEMODEF2P
2007
           (mult:SSEMODEF2P
2008
            (neg:SSEMODEF2P
2009
             (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2010
            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2011
           (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
2012
         UNSPEC_FMA4_INTRINSIC))]
2013
  "TARGET_FMA4"
2014
  "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2015
  [(set_attr "type" "ssemuladd")
2016
   (set_attr "mode" "")])
2017
 
2018
;; For the scalar operations, use operand1 for the upper words that aren't
2019
;; modified, so restrict the forms that are accepted.
2020
(define_insn "fma4i_vmfmadd4"
2021
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2022
        (unspec:SSEMODEF2P
2023
         [(vec_merge:SSEMODEF2P
2024
           (plus:SSEMODEF2P
2025
            (mult:SSEMODEF2P
2026
             (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2027
             (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2028
            (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2029
           (match_dup 0)
2030
           (const_int 1))]
2031
         UNSPEC_FMA4_INTRINSIC))]
2032
  "TARGET_FMA4"
2033
  "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2034
  [(set_attr "type" "ssemuladd")
2035
   (set_attr "mode" "")])
2036
 
2037
(define_insn "fma4i_vmfmsub4"
2038
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2039
        (unspec:SSEMODEF2P
2040
         [(vec_merge:SSEMODEF2P
2041
           (minus:SSEMODEF2P
2042
            (mult:SSEMODEF2P
2043
             (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2044
             (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2045
            (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2046
           (match_dup 0)
2047
           (const_int 1))]
2048
         UNSPEC_FMA4_INTRINSIC))]
2049
  "TARGET_FMA4"
2050
  "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2051
  [(set_attr "type" "ssemuladd")
2052
   (set_attr "mode" "")])
2053
 
2054
(define_insn "fma4i_vmfnmadd4"
2055
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2056
        (unspec:SSEMODEF2P
2057
         [(vec_merge:SSEMODEF2P
2058
           (minus:SSEMODEF2P
2059
            (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2060
            (mult:SSEMODEF2P
2061
             (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2062
             (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
2063
           (match_dup 0)
2064
           (const_int 1))]
2065
         UNSPEC_FMA4_INTRINSIC))]
2066
  "TARGET_FMA4"
2067
  "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2068
  [(set_attr "type" "ssemuladd")
2069
   (set_attr "mode" "")])
2070
 
2071
(define_insn "fma4i_vmfnmsub4"
2072
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2073
        (unspec:SSEMODEF2P
2074
         [(vec_merge:SSEMODEF2P
2075
           (minus:SSEMODEF2P
2076
            (mult:SSEMODEF2P
2077
             (neg:SSEMODEF2P
2078
              (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2079
             (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2080
            (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2081
           (match_dup 0)
2082
           (const_int 1))]
2083
         UNSPEC_FMA4_INTRINSIC))]
2084
  "TARGET_FMA4"
2085
  "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2086
  [(set_attr "type" "ssemuladd")
2087
   (set_attr "mode" "")])
2088
 
2089
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2090
;;
2091
;; FMA4 Parallel floating point multiply addsub and subadd operations.
2092
;;
2093
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2094
 
2095
(define_insn "fma4_fmaddsubv8sf4"
2096
  [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2097
        (vec_merge:V8SF
2098
          (plus:V8SF
2099
            (mult:V8SF
2100
              (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2101
              (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2102
            (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2103
          (minus:V8SF
2104
            (mult:V8SF
2105
              (match_dup 1)
2106
              (match_dup 2))
2107
            (match_dup 3))
2108
          (const_int 170)))]
2109
  "TARGET_FMA4 && TARGET_FUSED_MADD"
2110
  "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2111
  [(set_attr "type" "ssemuladd")
2112
   (set_attr "mode" "V8SF")])
2113
 
2114
(define_insn "fma4_fmaddsubv4df4"
2115
  [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2116
        (vec_merge:V4DF
2117
          (plus:V4DF
2118
            (mult:V4DF
2119
              (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2120
              (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2121
            (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2122
          (minus:V4DF
2123
            (mult:V4DF
2124
              (match_dup 1)
2125
              (match_dup 2))
2126
            (match_dup 3))
2127
          (const_int 10)))]
2128
  "TARGET_FMA4 && TARGET_FUSED_MADD"
2129
  "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2130
  [(set_attr "type" "ssemuladd")
2131
   (set_attr "mode" "V4DF")])
2132
 
2133
(define_insn "fma4_fmaddsubv4sf4"
2134
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2135
        (vec_merge:V4SF
2136
          (plus:V4SF
2137
            (mult:V4SF
2138
              (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2139
              (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2140
            (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2141
          (minus:V4SF
2142
            (mult:V4SF
2143
              (match_dup 1)
2144
              (match_dup 2))
2145
            (match_dup 3))
2146
          (const_int 10)))]
2147
  "TARGET_FMA4 && TARGET_FUSED_MADD"
2148
  "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2149
  [(set_attr "type" "ssemuladd")
2150
   (set_attr "mode" "V4SF")])
2151
 
2152
(define_insn "fma4_fmaddsubv2df4"
2153
  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2154
        (vec_merge:V2DF
2155
          (plus:V2DF
2156
            (mult:V2DF
2157
              (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2158
              (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2159
            (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2160
          (minus:V2DF
2161
            (mult:V2DF
2162
              (match_dup 1)
2163
              (match_dup 2))
2164
            (match_dup 3))
2165
          (const_int 2)))]
2166
  "TARGET_FMA4 && TARGET_FUSED_MADD"
2167
  "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2168
  [(set_attr "type" "ssemuladd")
2169
   (set_attr "mode" "V2DF")])
2170
 
2171
(define_insn "fma4_fmsubaddv8sf4"
2172
  [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2173
        (vec_merge:V8SF
2174
          (plus:V8SF
2175
            (mult:V8SF
2176
              (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2177
              (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2178
            (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2179
          (minus:V8SF
2180
            (mult:V8SF
2181
              (match_dup 1)
2182
              (match_dup 2))
2183
            (match_dup 3))
2184
          (const_int 85)))]
2185
  "TARGET_FMA4 && TARGET_FUSED_MADD"
2186
  "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2187
  [(set_attr "type" "ssemuladd")
2188
   (set_attr "mode" "V8SF")])
2189
 
2190
(define_insn "fma4_fmsubaddv4df4"
2191
  [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2192
        (vec_merge:V4DF
2193
          (plus:V4DF
2194
            (mult:V4DF
2195
              (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2196
              (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2197
            (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2198
          (minus:V4DF
2199
            (mult:V4DF
2200
              (match_dup 1)
2201
              (match_dup 2))
2202
            (match_dup 3))
2203
          (const_int 5)))]
2204
  "TARGET_FMA4 && TARGET_FUSED_MADD"
2205
  "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2206
  [(set_attr "type" "ssemuladd")
2207
   (set_attr "mode" "V4DF")])
2208
 
2209
(define_insn "fma4_fmsubaddv4sf4"
2210
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2211
        (vec_merge:V4SF
2212
          (plus:V4SF
2213
            (mult:V4SF
2214
              (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2215
              (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2216
            (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2217
          (minus:V4SF
2218
            (mult:V4SF
2219
              (match_dup 1)
2220
              (match_dup 2))
2221
            (match_dup 3))
2222
          (const_int 5)))]
2223
  "TARGET_FMA4 && TARGET_FUSED_MADD"
2224
  "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2225
  [(set_attr "type" "ssemuladd")
2226
   (set_attr "mode" "V4SF")])
2227
 
2228
(define_insn "fma4_fmsubaddv2df4"
2229
  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2230
        (vec_merge:V2DF
2231
          (plus:V2DF
2232
            (mult:V2DF
2233
              (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2234
              (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2235
            (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2236
          (minus:V2DF
2237
            (mult:V2DF
2238
              (match_dup 1)
2239
              (match_dup 2))
2240
            (match_dup 3))
2241
          (const_int 1)))]
2242
  "TARGET_FMA4 && TARGET_FUSED_MADD"
2243
  "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2244
  [(set_attr "type" "ssemuladd")
2245
   (set_attr "mode" "V2DF")])
2246
 
2247
(define_insn "fma4i_fmaddsubv8sf4"
2248
  [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2249
        (unspec:V8SF
2250
         [(vec_merge:V8SF
2251
           (plus:V8SF
2252
             (mult:V8SF
2253
               (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2254
               (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2255
             (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2256
           (minus:V8SF
2257
             (mult:V8SF
2258
               (match_dup 1)
2259
               (match_dup 2))
2260
             (match_dup 3))
2261
           (const_int 170))]
2262
         UNSPEC_FMA4_INTRINSIC))]
2263
  "TARGET_FMA4"
2264
  "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2265
  [(set_attr "type" "ssemuladd")
2266
   (set_attr "mode" "V8SF")])
2267
 
2268
(define_insn "fma4i_fmaddsubv4df4"
2269
  [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2270
        (unspec:V4DF
2271
         [(vec_merge:V4DF
2272
           (plus:V4DF
2273
             (mult:V4DF
2274
               (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2275
               (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2276
             (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2277
           (minus:V4DF
2278
             (mult:V4DF
2279
               (match_dup 1)
2280
               (match_dup 2))
2281
             (match_dup 3))
2282
           (const_int 10))]
2283
         UNSPEC_FMA4_INTRINSIC))]
2284
  "TARGET_FMA4"
2285
  "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2286
  [(set_attr "type" "ssemuladd")
2287
   (set_attr "mode" "V4DF")])
2288
 
2289
(define_insn "fma4i_fmaddsubv4sf4"
2290
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2291
        (unspec:V4SF
2292
         [(vec_merge:V4SF
2293
           (plus:V4SF
2294
             (mult:V4SF
2295
               (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2296
               (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2297
             (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2298
           (minus:V4SF
2299
             (mult:V4SF
2300
               (match_dup 1)
2301
               (match_dup 2))
2302
             (match_dup 3))
2303
           (const_int 10))]
2304
         UNSPEC_FMA4_INTRINSIC))]
2305
  "TARGET_FMA4"
2306
  "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2307
  [(set_attr "type" "ssemuladd")
2308
   (set_attr "mode" "V4SF")])
2309
 
2310
(define_insn "fma4i_fmaddsubv2df4"
2311
  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2312
        (unspec:V2DF
2313
         [(vec_merge:V2DF
2314
           (plus:V2DF
2315
             (mult:V2DF
2316
               (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2317
               (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2318
             (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2319
           (minus:V2DF
2320
             (mult:V2DF
2321
               (match_dup 1)
2322
               (match_dup 2))
2323
             (match_dup 3))
2324
           (const_int 2))]
2325
         UNSPEC_FMA4_INTRINSIC))]
2326
  "TARGET_FMA4"
2327
  "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2328
  [(set_attr "type" "ssemuladd")
2329
   (set_attr "mode" "V2DF")])
2330
 
2331
(define_insn "fma4i_fmsubaddv8sf4"
2332
  [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2333
        (unspec:V8SF
2334
         [(vec_merge:V8SF
2335
           (plus:V8SF
2336
             (mult:V8SF
2337
               (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2338
               (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2339
             (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2340
           (minus:V8SF
2341
             (mult:V8SF
2342
               (match_dup 1)
2343
               (match_dup 2))
2344
             (match_dup 3))
2345
           (const_int 85))]
2346
         UNSPEC_FMA4_INTRINSIC))]
2347
  "TARGET_FMA4"
2348
  "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2349
  [(set_attr "type" "ssemuladd")
2350
   (set_attr "mode" "V8SF")])
2351
 
2352
(define_insn "fma4i_fmsubaddv4df4"
2353
  [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2354
        (unspec:V4DF
2355
         [(vec_merge:V4DF
2356
           (plus:V4DF
2357
             (mult:V4DF
2358
               (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2359
               (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2360
             (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2361
           (minus:V4DF
2362
             (mult:V4DF
2363
               (match_dup 1)
2364
               (match_dup 2))
2365
             (match_dup 3))
2366
           (const_int 5))]
2367
         UNSPEC_FMA4_INTRINSIC))]
2368
  "TARGET_FMA4"
2369
  "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2370
  [(set_attr "type" "ssemuladd")
2371
   (set_attr "mode" "V4DF")])
2372
 
2373
(define_insn "fma4i_fmsubaddv4sf4"
2374
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2375
        (unspec:V4SF
2376
         [(vec_merge:V4SF
2377
           (plus:V4SF
2378
             (mult:V4SF
2379
               (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2380
               (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2381
             (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2382
           (minus:V4SF
2383
             (mult:V4SF
2384
               (match_dup 1)
2385
               (match_dup 2))
2386
             (match_dup 3))
2387
           (const_int 5))]
2388
         UNSPEC_FMA4_INTRINSIC))]
2389
  "TARGET_FMA4"
2390
  "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2391
  [(set_attr "type" "ssemuladd")
2392
   (set_attr "mode" "V4SF")])
2393
 
2394
(define_insn "fma4i_fmsubaddv2df4"
2395
  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2396
        (unspec:V2DF
2397
         [(vec_merge:V2DF
2398
           (plus:V2DF
2399
             (mult:V2DF
2400
               (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2401
               (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2402
             (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2403
           (minus:V2DF
2404
             (mult:V2DF
2405
               (match_dup 1)
2406
               (match_dup 2))
2407
             (match_dup 3))
2408
           (const_int 1))]
2409
         UNSPEC_FMA4_INTRINSIC))]
2410
  "TARGET_FMA4"
2411
  "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2412
  [(set_attr "type" "ssemuladd")
2413
   (set_attr "mode" "V2DF")])
2414
 
2415
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2416
;;
2417
;; Parallel single-precision floating point conversion operations
2418
;;
2419
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2420
 
2421
(define_insn "sse_cvtpi2ps"
2422
  [(set (match_operand:V4SF 0 "register_operand" "=x")
2423
        (vec_merge:V4SF
2424
          (vec_duplicate:V4SF
2425
            (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2426
          (match_operand:V4SF 1 "register_operand" "0")
2427
          (const_int 3)))]
2428
  "TARGET_SSE"
2429
  "cvtpi2ps\t{%2, %0|%0, %2}"
2430
  [(set_attr "type" "ssecvt")
2431
   (set_attr "mode" "V4SF")])
2432
 
2433
(define_insn "sse_cvtps2pi"
2434
  [(set (match_operand:V2SI 0 "register_operand" "=y")
2435
        (vec_select:V2SI
2436
          (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2437
                       UNSPEC_FIX_NOTRUNC)
2438
          (parallel [(const_int 0) (const_int 1)])))]
2439
  "TARGET_SSE"
2440
  "cvtps2pi\t{%1, %0|%0, %1}"
2441
  [(set_attr "type" "ssecvt")
2442
   (set_attr "unit" "mmx")
2443
   (set_attr "mode" "DI")])
2444
 
2445
(define_insn "sse_cvttps2pi"
2446
  [(set (match_operand:V2SI 0 "register_operand" "=y")
2447
        (vec_select:V2SI
2448
          (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2449
          (parallel [(const_int 0) (const_int 1)])))]
2450
  "TARGET_SSE"
2451
  "cvttps2pi\t{%1, %0|%0, %1}"
2452
  [(set_attr "type" "ssecvt")
2453
   (set_attr "unit" "mmx")
2454
   (set_attr "prefix_rep" "0")
2455
   (set_attr "mode" "SF")])
2456
 
2457
(define_insn "*avx_cvtsi2ss"
2458
  [(set (match_operand:V4SF 0 "register_operand" "=x")
2459
        (vec_merge:V4SF
2460
          (vec_duplicate:V4SF
2461
            (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2462
          (match_operand:V4SF 1 "register_operand" "x")
2463
          (const_int 1)))]
2464
  "TARGET_AVX"
2465
  "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2466
  [(set_attr "type" "sseicvt")
2467
   (set_attr "prefix" "vex")
2468
   (set_attr "mode" "SF")])
2469
 
2470
(define_insn "sse_cvtsi2ss"
2471
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2472
        (vec_merge:V4SF
2473
          (vec_duplicate:V4SF
2474
            (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2475
          (match_operand:V4SF 1 "register_operand" "0,0")
2476
          (const_int 1)))]
2477
  "TARGET_SSE"
2478
  "cvtsi2ss\t{%2, %0|%0, %2}"
2479
  [(set_attr "type" "sseicvt")
2480
   (set_attr "athlon_decode" "vector,double")
2481
   (set_attr "amdfam10_decode" "vector,double")
2482
   (set_attr "mode" "SF")])
2483
 
2484
(define_insn "*avx_cvtsi2ssq"
2485
  [(set (match_operand:V4SF 0 "register_operand" "=x")
2486
        (vec_merge:V4SF
2487
          (vec_duplicate:V4SF
2488
            (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2489
          (match_operand:V4SF 1 "register_operand" "x")
2490
          (const_int 1)))]
2491
  "TARGET_AVX && TARGET_64BIT"
2492
  "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2493
  [(set_attr "type" "sseicvt")
2494
   (set_attr "length_vex" "4")
2495
   (set_attr "prefix" "vex")
2496
   (set_attr "mode" "SF")])
2497
 
2498
(define_insn "sse_cvtsi2ssq"
2499
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2500
        (vec_merge:V4SF
2501
          (vec_duplicate:V4SF
2502
            (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
2503
          (match_operand:V4SF 1 "register_operand" "0,0")
2504
          (const_int 1)))]
2505
  "TARGET_SSE && TARGET_64BIT"
2506
  "cvtsi2ssq\t{%2, %0|%0, %2}"
2507
  [(set_attr "type" "sseicvt")
2508
   (set_attr "prefix_rex" "1")
2509
   (set_attr "athlon_decode" "vector,double")
2510
   (set_attr "amdfam10_decode" "vector,double")
2511
   (set_attr "mode" "SF")])
2512
 
2513
(define_insn "sse_cvtss2si"
2514
  [(set (match_operand:SI 0 "register_operand" "=r,r")
2515
        (unspec:SI
2516
          [(vec_select:SF
2517
             (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2518
             (parallel [(const_int 0)]))]
2519
          UNSPEC_FIX_NOTRUNC))]
2520
  "TARGET_SSE"
2521
  "%vcvtss2si\t{%1, %0|%0, %1}"
2522
  [(set_attr "type" "sseicvt")
2523
   (set_attr "athlon_decode" "double,vector")
2524
   (set_attr "prefix_rep" "1")
2525
   (set_attr "prefix" "maybe_vex")
2526
   (set_attr "mode" "SI")])
2527
 
2528
(define_insn "sse_cvtss2si_2"
2529
  [(set (match_operand:SI 0 "register_operand" "=r,r")
2530
        (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2531
                   UNSPEC_FIX_NOTRUNC))]
2532
  "TARGET_SSE"
2533
  "%vcvtss2si\t{%1, %0|%0, %1}"
2534
  [(set_attr "type" "sseicvt")
2535
   (set_attr "athlon_decode" "double,vector")
2536
   (set_attr "amdfam10_decode" "double,double")
2537
   (set_attr "prefix_rep" "1")
2538
   (set_attr "prefix" "maybe_vex")
2539
   (set_attr "mode" "SI")])
2540
 
2541
(define_insn "sse_cvtss2siq"
2542
  [(set (match_operand:DI 0 "register_operand" "=r,r")
2543
        (unspec:DI
2544
          [(vec_select:SF
2545
             (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2546
             (parallel [(const_int 0)]))]
2547
          UNSPEC_FIX_NOTRUNC))]
2548
  "TARGET_SSE && TARGET_64BIT"
2549
  "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2550
  [(set_attr "type" "sseicvt")
2551
   (set_attr "athlon_decode" "double,vector")
2552
   (set_attr "prefix_rep" "1")
2553
   (set_attr "prefix" "maybe_vex")
2554
   (set_attr "mode" "DI")])
2555
 
2556
(define_insn "sse_cvtss2siq_2"
2557
  [(set (match_operand:DI 0 "register_operand" "=r,r")
2558
        (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2559
                   UNSPEC_FIX_NOTRUNC))]
2560
  "TARGET_SSE && TARGET_64BIT"
2561
  "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2562
  [(set_attr "type" "sseicvt")
2563
   (set_attr "athlon_decode" "double,vector")
2564
   (set_attr "amdfam10_decode" "double,double")
2565
   (set_attr "prefix_rep" "1")
2566
   (set_attr "prefix" "maybe_vex")
2567
   (set_attr "mode" "DI")])
2568
 
2569
(define_insn "sse_cvttss2si"
2570
  [(set (match_operand:SI 0 "register_operand" "=r,r")
2571
        (fix:SI
2572
          (vec_select:SF
2573
            (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2574
            (parallel [(const_int 0)]))))]
2575
  "TARGET_SSE"
2576
  "%vcvttss2si\t{%1, %0|%0, %1}"
2577
  [(set_attr "type" "sseicvt")
2578
   (set_attr "athlon_decode" "double,vector")
2579
   (set_attr "amdfam10_decode" "double,double")
2580
   (set_attr "prefix_rep" "1")
2581
   (set_attr "prefix" "maybe_vex")
2582
   (set_attr "mode" "SI")])
2583
 
2584
(define_insn "sse_cvttss2siq"
2585
  [(set (match_operand:DI 0 "register_operand" "=r,r")
2586
        (fix:DI
2587
          (vec_select:SF
2588
            (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2589
            (parallel [(const_int 0)]))))]
2590
  "TARGET_SSE && TARGET_64BIT"
2591
  "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2592
  [(set_attr "type" "sseicvt")
2593
   (set_attr "athlon_decode" "double,vector")
2594
   (set_attr "amdfam10_decode" "double,double")
2595
   (set_attr "prefix_rep" "1")
2596
   (set_attr "prefix" "maybe_vex")
2597
   (set_attr "mode" "DI")])
2598
 
2599
(define_insn "avx_cvtdq2ps"
2600
  [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
2601
        (float:AVXMODEDCVTDQ2PS
2602
          (match_operand: 1 "nonimmediate_operand" "xm")))]
2603
  "TARGET_AVX"
2604
  "vcvtdq2ps\t{%1, %0|%0, %1}"
2605
  [(set_attr "type" "ssecvt")
2606
   (set_attr "prefix" "vex")
2607
   (set_attr "mode" "")])
2608
 
2609
(define_insn "sse2_cvtdq2ps"
2610
  [(set (match_operand:V4SF 0 "register_operand" "=x")
2611
        (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2612
  "TARGET_SSE2"
2613
  "cvtdq2ps\t{%1, %0|%0, %1}"
2614
  [(set_attr "type" "ssecvt")
2615
   (set_attr "mode" "V4SF")])
2616
 
2617
(define_expand "sse2_cvtudq2ps"
2618
  [(set (match_dup 5)
2619
        (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2620
   (set (match_dup 6)
2621
        (lt:V4SF (match_dup 5) (match_dup 3)))
2622
   (set (match_dup 7)
2623
        (and:V4SF (match_dup 6) (match_dup 4)))
2624
   (set (match_operand:V4SF 0 "register_operand" "")
2625
        (plus:V4SF (match_dup 5) (match_dup 7)))]
2626
  "TARGET_SSE2"
2627
{
2628
  REAL_VALUE_TYPE TWO32r;
2629
  rtx x;
2630
  int i;
2631
 
2632
  real_ldexp (&TWO32r, &dconst1, 32);
2633
  x = const_double_from_real_value (TWO32r, SFmode);
2634
 
2635
  operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2636
  operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2637
 
2638
  for (i = 5; i < 8; i++)
2639
    operands[i] = gen_reg_rtx (V4SFmode);
2640
})
2641
 
2642
(define_insn "avx_cvtps2dq"
2643
  [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2644
        (unspec:AVXMODEDCVTPS2DQ
2645
          [(match_operand: 1 "nonimmediate_operand" "xm")]
2646
          UNSPEC_FIX_NOTRUNC))]
2647
  "TARGET_AVX"
2648
  "vcvtps2dq\t{%1, %0|%0, %1}"
2649
  [(set_attr "type" "ssecvt")
2650
   (set_attr "prefix" "vex")
2651
   (set_attr "mode" "")])
2652
 
2653
(define_insn "sse2_cvtps2dq"
2654
  [(set (match_operand:V4SI 0 "register_operand" "=x")
2655
        (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2656
                     UNSPEC_FIX_NOTRUNC))]
2657
  "TARGET_SSE2"
2658
  "cvtps2dq\t{%1, %0|%0, %1}"
2659
  [(set_attr "type" "ssecvt")
2660
   (set_attr "prefix_data16" "1")
2661
   (set_attr "mode" "TI")])
2662
 
2663
(define_insn "avx_cvttps2dq"
2664
  [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2665
        (fix:AVXMODEDCVTPS2DQ
2666
          (match_operand: 1 "nonimmediate_operand" "xm")))]
2667
  "TARGET_AVX"
2668
  "vcvttps2dq\t{%1, %0|%0, %1}"
2669
  [(set_attr "type" "ssecvt")
2670
   (set_attr "prefix" "vex")
2671
   (set_attr "mode" "")])
2672
 
2673
(define_insn "sse2_cvttps2dq"
2674
  [(set (match_operand:V4SI 0 "register_operand" "=x")
2675
        (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2676
  "TARGET_SSE2"
2677
  "cvttps2dq\t{%1, %0|%0, %1}"
2678
  [(set_attr "type" "ssecvt")
2679
   (set_attr "prefix_rep" "1")
2680
   (set_attr "prefix_data16" "0")
2681
   (set_attr "mode" "TI")])
2682
 
2683
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2684
;;
2685
;; Parallel double-precision floating point conversion operations
2686
;;
2687
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2688
 
2689
(define_insn "sse2_cvtpi2pd"
2690
  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2691
        (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2692
  "TARGET_SSE2"
2693
  "cvtpi2pd\t{%1, %0|%0, %1}"
2694
  [(set_attr "type" "ssecvt")
2695
   (set_attr "unit" "mmx,*")
2696
   (set_attr "prefix_data16" "1,*")
2697
   (set_attr "mode" "V2DF")])
2698
 
2699
(define_insn "sse2_cvtpd2pi"
2700
  [(set (match_operand:V2SI 0 "register_operand" "=y")
2701
        (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2702
                     UNSPEC_FIX_NOTRUNC))]
2703
  "TARGET_SSE2"
2704
  "cvtpd2pi\t{%1, %0|%0, %1}"
2705
  [(set_attr "type" "ssecvt")
2706
   (set_attr "unit" "mmx")
2707
   (set_attr "prefix_data16" "1")
2708
   (set_attr "mode" "DI")])
2709
 
2710
(define_insn "sse2_cvttpd2pi"
2711
  [(set (match_operand:V2SI 0 "register_operand" "=y")
2712
        (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2713
  "TARGET_SSE2"
2714
  "cvttpd2pi\t{%1, %0|%0, %1}"
2715
  [(set_attr "type" "ssecvt")
2716
   (set_attr "unit" "mmx")
2717
   (set_attr "prefix_data16" "1")
2718
   (set_attr "mode" "TI")])
2719
 
2720
(define_insn "*avx_cvtsi2sd"
2721
  [(set (match_operand:V2DF 0 "register_operand" "=x")
2722
        (vec_merge:V2DF
2723
          (vec_duplicate:V2DF
2724
            (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
2725
          (match_operand:V2DF 1 "register_operand" "x")
2726
          (const_int 1)))]
2727
  "TARGET_AVX"
2728
  "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2729
  [(set_attr "type" "sseicvt")
2730
   (set_attr "prefix" "vex")
2731
   (set_attr "mode" "DF")])
2732
 
2733
(define_insn "sse2_cvtsi2sd"
2734
  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2735
        (vec_merge:V2DF
2736
          (vec_duplicate:V2DF
2737
            (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
2738
          (match_operand:V2DF 1 "register_operand" "0,0")
2739
          (const_int 1)))]
2740
  "TARGET_SSE2"
2741
  "cvtsi2sd\t{%2, %0|%0, %2}"
2742
  [(set_attr "type" "sseicvt")
2743
   (set_attr "mode" "DF")
2744
   (set_attr "athlon_decode" "double,direct")
2745
   (set_attr "amdfam10_decode" "vector,double")])
2746
 
2747
(define_insn "*avx_cvtsi2sdq"
2748
  [(set (match_operand:V2DF 0 "register_operand" "=x")
2749
        (vec_merge:V2DF
2750
          (vec_duplicate:V2DF
2751
            (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
2752
          (match_operand:V2DF 1 "register_operand" "x")
2753
          (const_int 1)))]
2754
  "TARGET_AVX && TARGET_64BIT"
2755
  "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2756
  [(set_attr "type" "sseicvt")
2757
   (set_attr "length_vex" "4")
2758
   (set_attr "prefix" "vex")
2759
   (set_attr "mode" "DF")])
2760
 
2761
(define_insn "sse2_cvtsi2sdq"
2762
  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2763
        (vec_merge:V2DF
2764
          (vec_duplicate:V2DF
2765
            (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
2766
          (match_operand:V2DF 1 "register_operand" "0,0")
2767
          (const_int 1)))]
2768
  "TARGET_SSE2 && TARGET_64BIT"
2769
  "cvtsi2sdq\t{%2, %0|%0, %2}"
2770
  [(set_attr "type" "sseicvt")
2771
   (set_attr "prefix_rex" "1")
2772
   (set_attr "mode" "DF")
2773
   (set_attr "athlon_decode" "double,direct")
2774
   (set_attr "amdfam10_decode" "vector,double")])
2775
 
2776
(define_insn "sse2_cvtsd2si"
2777
  [(set (match_operand:SI 0 "register_operand" "=r,r")
2778
        (unspec:SI
2779
          [(vec_select:DF
2780
             (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2781
             (parallel [(const_int 0)]))]
2782
          UNSPEC_FIX_NOTRUNC))]
2783
  "TARGET_SSE2"
2784
  "%vcvtsd2si\t{%1, %0|%0, %1}"
2785
  [(set_attr "type" "sseicvt")
2786
   (set_attr "athlon_decode" "double,vector")
2787
   (set_attr "prefix_rep" "1")
2788
   (set_attr "prefix" "maybe_vex")
2789
   (set_attr "mode" "SI")])
2790
 
2791
(define_insn "sse2_cvtsd2si_2"
2792
  [(set (match_operand:SI 0 "register_operand" "=r,r")
2793
        (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2794
                   UNSPEC_FIX_NOTRUNC))]
2795
  "TARGET_SSE2"
2796
  "%vcvtsd2si\t{%1, %0|%0, %1}"
2797
  [(set_attr "type" "sseicvt")
2798
   (set_attr "athlon_decode" "double,vector")
2799
   (set_attr "amdfam10_decode" "double,double")
2800
   (set_attr "prefix_rep" "1")
2801
   (set_attr "prefix" "maybe_vex")
2802
   (set_attr "mode" "SI")])
2803
 
2804
(define_insn "sse2_cvtsd2siq"
2805
  [(set (match_operand:DI 0 "register_operand" "=r,r")
2806
        (unspec:DI
2807
          [(vec_select:DF
2808
             (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2809
             (parallel [(const_int 0)]))]
2810
          UNSPEC_FIX_NOTRUNC))]
2811
  "TARGET_SSE2 && TARGET_64BIT"
2812
  "%vcvtsd2siq\t{%1, %0|%0, %1}"
2813
  [(set_attr "type" "sseicvt")
2814
   (set_attr "athlon_decode" "double,vector")
2815
   (set_attr "prefix_rep" "1")
2816
   (set_attr "prefix" "maybe_vex")
2817
   (set_attr "mode" "DI")])
2818
 
2819
(define_insn "sse2_cvtsd2siq_2"
2820
  [(set (match_operand:DI 0 "register_operand" "=r,r")
2821
        (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2822
                   UNSPEC_FIX_NOTRUNC))]
2823
  "TARGET_SSE2 && TARGET_64BIT"
2824
  "%vcvtsd2siq\t{%1, %0|%0, %1}"
2825
  [(set_attr "type" "sseicvt")
2826
   (set_attr "athlon_decode" "double,vector")
2827
   (set_attr "amdfam10_decode" "double,double")
2828
   (set_attr "prefix_rep" "1")
2829
   (set_attr "prefix" "maybe_vex")
2830
   (set_attr "mode" "DI")])
2831
 
2832
(define_insn "sse2_cvttsd2si"
2833
  [(set (match_operand:SI 0 "register_operand" "=r,r")
2834
        (fix:SI
2835
          (vec_select:DF
2836
            (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2837
            (parallel [(const_int 0)]))))]
2838
  "TARGET_SSE2"
2839
  "%vcvttsd2si\t{%1, %0|%0, %1}"
2840
  [(set_attr "type" "sseicvt")
2841
   (set_attr "prefix_rep" "1")
2842
   (set_attr "prefix" "maybe_vex")
2843
   (set_attr "mode" "SI")
2844
   (set_attr "athlon_decode" "double,vector")
2845
   (set_attr "amdfam10_decode" "double,double")])
2846
 
2847
(define_insn "sse2_cvttsd2siq"
2848
  [(set (match_operand:DI 0 "register_operand" "=r,r")
2849
        (fix:DI
2850
          (vec_select:DF
2851
            (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2852
            (parallel [(const_int 0)]))))]
2853
  "TARGET_SSE2 && TARGET_64BIT"
2854
  "%vcvttsd2siq\t{%1, %0|%0, %1}"
2855
  [(set_attr "type" "sseicvt")
2856
   (set_attr "prefix_rep" "1")
2857
   (set_attr "prefix" "maybe_vex")
2858
   (set_attr "mode" "DI")
2859
   (set_attr "athlon_decode" "double,vector")
2860
   (set_attr "amdfam10_decode" "double,double")])
2861
 
2862
(define_insn "avx_cvtdq2pd256"
2863
  [(set (match_operand:V4DF 0 "register_operand" "=x")
2864
        (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2865
  "TARGET_AVX"
2866
  "vcvtdq2pd\t{%1, %0|%0, %1}"
2867
  [(set_attr "type" "ssecvt")
2868
   (set_attr "prefix" "vex")
2869
   (set_attr "mode" "V4DF")])
2870
 
2871
(define_insn "sse2_cvtdq2pd"
2872
  [(set (match_operand:V2DF 0 "register_operand" "=x")
2873
        (float:V2DF
2874
          (vec_select:V2SI
2875
            (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2876
            (parallel [(const_int 0) (const_int 1)]))))]
2877
  "TARGET_SSE2"
2878
  "%vcvtdq2pd\t{%1, %0|%0, %1}"
2879
  [(set_attr "type" "ssecvt")
2880
   (set_attr "prefix" "maybe_vex")
2881
   (set_attr "mode" "V2DF")])
2882
 
2883
(define_insn "avx_cvtpd2dq256"
2884
  [(set (match_operand:V4SI 0 "register_operand" "=x")
2885
        (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2886
                     UNSPEC_FIX_NOTRUNC))]
2887
  "TARGET_AVX"
2888
  "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2889
  [(set_attr "type" "ssecvt")
2890
   (set_attr "prefix" "vex")
2891
   (set_attr "mode" "OI")])
2892
 
2893
(define_expand "sse2_cvtpd2dq"
2894
  [(set (match_operand:V4SI 0 "register_operand" "")
2895
        (vec_concat:V4SI
2896
          (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2897
                       UNSPEC_FIX_NOTRUNC)
2898
          (match_dup 2)))]
2899
  "TARGET_SSE2"
2900
  "operands[2] = CONST0_RTX (V2SImode);")
2901
 
2902
(define_insn "*sse2_cvtpd2dq"
2903
  [(set (match_operand:V4SI 0 "register_operand" "=x")
2904
        (vec_concat:V4SI
2905
          (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2906
                       UNSPEC_FIX_NOTRUNC)
2907
          (match_operand:V2SI 2 "const0_operand" "")))]
2908
  "TARGET_SSE2"
2909
  "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2910
                       : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2911
  [(set_attr "type" "ssecvt")
2912
   (set_attr "prefix_rep" "1")
2913
   (set_attr "prefix_data16" "0")
2914
   (set_attr "prefix" "maybe_vex")
2915
   (set_attr "mode" "TI")
2916
   (set_attr "amdfam10_decode" "double")])
2917
 
2918
(define_insn "avx_cvttpd2dq256"
2919
  [(set (match_operand:V4SI 0 "register_operand" "=x")
2920
        (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2921
  "TARGET_AVX"
2922
  "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2923
  [(set_attr "type" "ssecvt")
2924
   (set_attr "prefix" "vex")
2925
   (set_attr "mode" "OI")])
2926
 
2927
(define_expand "sse2_cvttpd2dq"
2928
  [(set (match_operand:V4SI 0 "register_operand" "")
2929
        (vec_concat:V4SI
2930
          (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2931
          (match_dup 2)))]
2932
  "TARGET_SSE2"
2933
  "operands[2] = CONST0_RTX (V2SImode);")
2934
 
2935
(define_insn "*sse2_cvttpd2dq"
2936
  [(set (match_operand:V4SI 0 "register_operand" "=x")
2937
        (vec_concat:V4SI
2938
          (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2939
          (match_operand:V2SI 2 "const0_operand" "")))]
2940
  "TARGET_SSE2"
2941
  "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2942
                       : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2943
  [(set_attr "type" "ssecvt")
2944
   (set_attr "prefix" "maybe_vex")
2945
   (set_attr "mode" "TI")
2946
   (set_attr "amdfam10_decode" "double")])
2947
 
2948
(define_insn "*avx_cvtsd2ss"
2949
  [(set (match_operand:V4SF 0 "register_operand" "=x")
2950
        (vec_merge:V4SF
2951
          (vec_duplicate:V4SF
2952
            (float_truncate:V2SF
2953
              (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
2954
          (match_operand:V4SF 1 "register_operand" "x")
2955
          (const_int 1)))]
2956
  "TARGET_AVX"
2957
  "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2958
  [(set_attr "type" "ssecvt")
2959
   (set_attr "prefix" "vex")
2960
   (set_attr "mode" "SF")])
2961
 
2962
(define_insn "sse2_cvtsd2ss"
2963
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2964
        (vec_merge:V4SF
2965
          (vec_duplicate:V4SF
2966
            (float_truncate:V2SF
2967
              (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2968
          (match_operand:V4SF 1 "register_operand" "0,0")
2969
          (const_int 1)))]
2970
  "TARGET_SSE2"
2971
  "cvtsd2ss\t{%2, %0|%0, %2}"
2972
  [(set_attr "type" "ssecvt")
2973
   (set_attr "athlon_decode" "vector,double")
2974
   (set_attr "amdfam10_decode" "vector,double")
2975
   (set_attr "mode" "SF")])
2976
 
2977
(define_insn "*avx_cvtss2sd"
2978
  [(set (match_operand:V2DF 0 "register_operand" "=x")
2979
        (vec_merge:V2DF
2980
          (float_extend:V2DF
2981
            (vec_select:V2SF
2982
              (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2983
              (parallel [(const_int 0) (const_int 1)])))
2984
          (match_operand:V2DF 1 "register_operand" "x")
2985
          (const_int 1)))]
2986
  "TARGET_AVX"
2987
  "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2988
  [(set_attr "type" "ssecvt")
2989
   (set_attr "prefix" "vex")
2990
   (set_attr "mode" "DF")])
2991
 
2992
(define_insn "sse2_cvtss2sd"
2993
  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2994
        (vec_merge:V2DF
2995
          (float_extend:V2DF
2996
            (vec_select:V2SF
2997
              (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2998
              (parallel [(const_int 0) (const_int 1)])))
2999
          (match_operand:V2DF 1 "register_operand" "0,0")
3000
          (const_int 1)))]
3001
  "TARGET_SSE2"
3002
  "cvtss2sd\t{%2, %0|%0, %2}"
3003
  [(set_attr "type" "ssecvt")
3004
   (set_attr "amdfam10_decode" "vector,double")
3005
   (set_attr "mode" "DF")])
3006
 
3007
(define_insn "avx_cvtpd2ps256"
3008
  [(set (match_operand:V4SF 0 "register_operand" "=x")
3009
        (float_truncate:V4SF
3010
          (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
3011
  "TARGET_AVX"
3012
  "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
3013
  [(set_attr "type" "ssecvt")
3014
   (set_attr "prefix" "vex")
3015
   (set_attr "mode" "V4SF")])
3016
 
3017
(define_expand "sse2_cvtpd2ps"
3018
  [(set (match_operand:V4SF 0 "register_operand" "")
3019
        (vec_concat:V4SF
3020
          (float_truncate:V2SF
3021
            (match_operand:V2DF 1 "nonimmediate_operand" ""))
3022
          (match_dup 2)))]
3023
  "TARGET_SSE2"
3024
  "operands[2] = CONST0_RTX (V2SFmode);")
3025
 
3026
(define_insn "*sse2_cvtpd2ps"
3027
  [(set (match_operand:V4SF 0 "register_operand" "=x")
3028
        (vec_concat:V4SF
3029
          (float_truncate:V2SF
3030
            (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
3031
          (match_operand:V2SF 2 "const0_operand" "")))]
3032
  "TARGET_SSE2"
3033
  "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
3034
                       : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
3035
  [(set_attr "type" "ssecvt")
3036
   (set_attr "prefix_data16" "1")
3037
   (set_attr "prefix" "maybe_vex")
3038
   (set_attr "mode" "V4SF")
3039
   (set_attr "amdfam10_decode" "double")])
3040
 
3041
(define_insn "avx_cvtps2pd256"
3042
  [(set (match_operand:V4DF 0 "register_operand" "=x")
3043
        (float_extend:V4DF
3044
          (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3045
  "TARGET_AVX"
3046
  "vcvtps2pd\t{%1, %0|%0, %1}"
3047
  [(set_attr "type" "ssecvt")
3048
   (set_attr "prefix" "vex")
3049
   (set_attr "mode" "V4DF")])
3050
 
3051
(define_insn "sse2_cvtps2pd"
3052
  [(set (match_operand:V2DF 0 "register_operand" "=x")
3053
        (float_extend:V2DF
3054
          (vec_select:V2SF
3055
            (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3056
            (parallel [(const_int 0) (const_int 1)]))))]
3057
  "TARGET_SSE2"
3058
  "%vcvtps2pd\t{%1, %0|%0, %1}"
3059
  [(set_attr "type" "ssecvt")
3060
   (set_attr "prefix" "maybe_vex")
3061
   (set_attr "mode" "V2DF")
3062
   (set_attr "prefix_data16" "0")
3063
   (set_attr "amdfam10_decode" "direct")])
3064
 
3065
(define_expand "vec_unpacks_hi_v4sf"
3066
  [(set (match_dup 2)
3067
   (vec_select:V4SF
3068
     (vec_concat:V8SF
3069
       (match_dup 2)
3070
       (match_operand:V4SF 1 "nonimmediate_operand" ""))
3071
     (parallel [(const_int 6)
3072
                (const_int 7)
3073
                (const_int 2)
3074
                (const_int 3)])))
3075
  (set (match_operand:V2DF 0 "register_operand" "")
3076
   (float_extend:V2DF
3077
     (vec_select:V2SF
3078
       (match_dup 2)
3079
       (parallel [(const_int 0) (const_int 1)]))))]
3080
 "TARGET_SSE2"
3081
{
3082
 operands[2] = gen_reg_rtx (V4SFmode);
3083
})
3084
 
3085
(define_expand "vec_unpacks_lo_v4sf"
3086
  [(set (match_operand:V2DF 0 "register_operand" "")
3087
        (float_extend:V2DF
3088
          (vec_select:V2SF
3089
            (match_operand:V4SF 1 "nonimmediate_operand" "")
3090
            (parallel [(const_int 0) (const_int 1)]))))]
3091
  "TARGET_SSE2")
3092
 
3093
(define_expand "vec_unpacks_float_hi_v8hi"
3094
  [(match_operand:V4SF 0 "register_operand" "")
3095
   (match_operand:V8HI 1 "register_operand" "")]
3096
  "TARGET_SSE2"
3097
{
3098
  rtx tmp = gen_reg_rtx (V4SImode);
3099
 
3100
  emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
3101
  emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3102
  DONE;
3103
})
3104
 
3105
(define_expand "vec_unpacks_float_lo_v8hi"
3106
  [(match_operand:V4SF 0 "register_operand" "")
3107
   (match_operand:V8HI 1 "register_operand" "")]
3108
  "TARGET_SSE2"
3109
{
3110
  rtx tmp = gen_reg_rtx (V4SImode);
3111
 
3112
  emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
3113
  emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3114
  DONE;
3115
})
3116
 
3117
(define_expand "vec_unpacku_float_hi_v8hi"
3118
  [(match_operand:V4SF 0 "register_operand" "")
3119
   (match_operand:V8HI 1 "register_operand" "")]
3120
  "TARGET_SSE2"
3121
{
3122
  rtx tmp = gen_reg_rtx (V4SImode);
3123
 
3124
  emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
3125
  emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3126
  DONE;
3127
})
3128
 
3129
(define_expand "vec_unpacku_float_lo_v8hi"
3130
  [(match_operand:V4SF 0 "register_operand" "")
3131
   (match_operand:V8HI 1 "register_operand" "")]
3132
  "TARGET_SSE2"
3133
{
3134
  rtx tmp = gen_reg_rtx (V4SImode);
3135
 
3136
  emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
3137
  emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
3138
  DONE;
3139
})
3140
 
3141
(define_expand "vec_unpacks_float_hi_v4si"
3142
  [(set (match_dup 2)
3143
        (vec_select:V4SI
3144
          (match_operand:V4SI 1 "nonimmediate_operand" "")
3145
          (parallel [(const_int 2)
3146
                     (const_int 3)
3147
                     (const_int 2)
3148
                     (const_int 3)])))
3149
   (set (match_operand:V2DF 0 "register_operand" "")
3150
        (float:V2DF
3151
          (vec_select:V2SI
3152
          (match_dup 2)
3153
            (parallel [(const_int 0) (const_int 1)]))))]
3154
 "TARGET_SSE2"
3155
 "operands[2] = gen_reg_rtx (V4SImode);")
3156
 
3157
(define_expand "vec_unpacks_float_lo_v4si"
3158
  [(set (match_operand:V2DF 0 "register_operand" "")
3159
        (float:V2DF
3160
          (vec_select:V2SI
3161
            (match_operand:V4SI 1 "nonimmediate_operand" "")
3162
            (parallel [(const_int 0) (const_int 1)]))))]
3163
  "TARGET_SSE2")
3164
 
3165
(define_expand "vec_unpacku_float_hi_v4si"
3166
  [(set (match_dup 5)
3167
        (vec_select:V4SI
3168
          (match_operand:V4SI 1 "nonimmediate_operand" "")
3169
          (parallel [(const_int 2)
3170
                     (const_int 3)
3171
                     (const_int 2)
3172
                     (const_int 3)])))
3173
   (set (match_dup 6)
3174
        (float:V2DF
3175
          (vec_select:V2SI
3176
          (match_dup 5)
3177
            (parallel [(const_int 0) (const_int 1)]))))
3178
   (set (match_dup 7)
3179
        (lt:V2DF (match_dup 6) (match_dup 3)))
3180
   (set (match_dup 8)
3181
        (and:V2DF (match_dup 7) (match_dup 4)))
3182
   (set (match_operand:V2DF 0 "register_operand" "")
3183
        (plus:V2DF (match_dup 6) (match_dup 8)))]
3184
 "TARGET_SSE2"
3185
{
3186
  REAL_VALUE_TYPE TWO32r;
3187
  rtx x;
3188
  int i;
3189
 
3190
  real_ldexp (&TWO32r, &dconst1, 32);
3191
  x = const_double_from_real_value (TWO32r, DFmode);
3192
 
3193
  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3194
  operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3195
 
3196
  operands[5] = gen_reg_rtx (V4SImode);
3197
 
3198
  for (i = 6; i < 9; i++)
3199
    operands[i] = gen_reg_rtx (V2DFmode);
3200
})
3201
 
3202
(define_expand "vec_unpacku_float_lo_v4si"
3203
  [(set (match_dup 5)
3204
        (float:V2DF
3205
          (vec_select:V2SI
3206
            (match_operand:V4SI 1 "nonimmediate_operand" "")
3207
            (parallel [(const_int 0) (const_int 1)]))))
3208
   (set (match_dup 6)
3209
        (lt:V2DF (match_dup 5) (match_dup 3)))
3210
   (set (match_dup 7)
3211
        (and:V2DF (match_dup 6) (match_dup 4)))
3212
   (set (match_operand:V2DF 0 "register_operand" "")
3213
        (plus:V2DF (match_dup 5) (match_dup 7)))]
3214
  "TARGET_SSE2"
3215
{
3216
  REAL_VALUE_TYPE TWO32r;
3217
  rtx x;
3218
  int i;
3219
 
3220
  real_ldexp (&TWO32r, &dconst1, 32);
3221
  x = const_double_from_real_value (TWO32r, DFmode);
3222
 
3223
  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3224
  operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3225
 
3226
  for (i = 5; i < 8; i++)
3227
    operands[i] = gen_reg_rtx (V2DFmode);
3228
})
3229
 
3230
(define_expand "vec_pack_trunc_v2df"
3231
  [(match_operand:V4SF 0 "register_operand" "")
3232
   (match_operand:V2DF 1 "nonimmediate_operand" "")
3233
   (match_operand:V2DF 2 "nonimmediate_operand" "")]
3234
  "TARGET_SSE2"
3235
{
3236
  rtx r1, r2;
3237
 
3238
  r1 = gen_reg_rtx (V4SFmode);
3239
  r2 = gen_reg_rtx (V4SFmode);
3240
 
3241
  emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3242
  emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3243
  emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3244
  DONE;
3245
})
3246
 
3247
(define_expand "vec_pack_sfix_trunc_v2df"
3248
  [(match_operand:V4SI 0 "register_operand" "")
3249
   (match_operand:V2DF 1 "nonimmediate_operand" "")
3250
   (match_operand:V2DF 2 "nonimmediate_operand" "")]
3251
  "TARGET_SSE2"
3252
{
3253
  rtx r1, r2;
3254
 
3255
  r1 = gen_reg_rtx (V4SImode);
3256
  r2 = gen_reg_rtx (V4SImode);
3257
 
3258
  emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3259
  emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3260
  emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3261
                                         gen_lowpart (V2DImode, r1),
3262
                                         gen_lowpart (V2DImode, r2)));
3263
  DONE;
3264
})
3265
 
3266
(define_expand "vec_pack_sfix_v2df"
3267
  [(match_operand:V4SI 0 "register_operand" "")
3268
   (match_operand:V2DF 1 "nonimmediate_operand" "")
3269
   (match_operand:V2DF 2 "nonimmediate_operand" "")]
3270
  "TARGET_SSE2"
3271
{
3272
  rtx r1, r2;
3273
 
3274
  r1 = gen_reg_rtx (V4SImode);
3275
  r2 = gen_reg_rtx (V4SImode);
3276
 
3277
  emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3278
  emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3279
  emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3280
                                         gen_lowpart (V2DImode, r1),
3281
                                         gen_lowpart (V2DImode, r2)));
3282
  DONE;
3283
})
3284
 
3285
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3286
;;
3287
;; Parallel single-precision floating point element swizzling
3288
;;
3289
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3290
 
3291
(define_expand "sse_movhlps_exp"
3292
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3293
        (vec_select:V4SF
3294
          (vec_concat:V8SF
3295
            (match_operand:V4SF 1 "nonimmediate_operand" "")
3296
            (match_operand:V4SF 2 "nonimmediate_operand" ""))
3297
          (parallel [(const_int 6)
3298
                     (const_int 7)
3299
                     (const_int 2)
3300
                     (const_int 3)])))]
3301
  "TARGET_SSE"
3302
  "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3303
 
3304
(define_insn "*avx_movhlps"
3305
  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,m")
3306
        (vec_select:V4SF
3307
          (vec_concat:V8SF
3308
            (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3309
            (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3310
          (parallel [(const_int 6)
3311
                     (const_int 7)
3312
                     (const_int 2)
3313
                     (const_int 3)])))]
3314
  "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3315
  "@
3316
   vmovhlps\t{%2, %1, %0|%0, %1, %2}
3317
   vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3318
   vmovhps\t{%2, %0|%0, %2}"
3319
  [(set_attr "type" "ssemov")
3320
   (set_attr "prefix" "vex")
3321
   (set_attr "mode" "V4SF,V2SF,V2SF")])
3322
 
3323
(define_insn "sse_movhlps"
3324
  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,m")
3325
        (vec_select:V4SF
3326
          (vec_concat:V8SF
3327
            (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3328
            (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
3329
          (parallel [(const_int 6)
3330
                     (const_int 7)
3331
                     (const_int 2)
3332
                     (const_int 3)])))]
3333
  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3334
  "@
3335
   movhlps\t{%2, %0|%0, %2}
3336
   movlps\t{%H2, %0|%0, %H2}
3337
   movhps\t{%2, %0|%0, %2}"
3338
  [(set_attr "type" "ssemov")
3339
   (set_attr "mode" "V4SF,V2SF,V2SF")])
3340
 
3341
(define_expand "sse_movlhps_exp"
3342
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3343
        (vec_select:V4SF
3344
          (vec_concat:V8SF
3345
            (match_operand:V4SF 1 "nonimmediate_operand" "")
3346
            (match_operand:V4SF 2 "nonimmediate_operand" ""))
3347
          (parallel [(const_int 0)
3348
                     (const_int 1)
3349
                     (const_int 4)
3350
                     (const_int 5)])))]
3351
  "TARGET_SSE"
3352
  "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3353
 
3354
(define_insn "*avx_movlhps"
3355
  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,o")
3356
        (vec_select:V4SF
3357
          (vec_concat:V8SF
3358
            (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
3359
            (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3360
          (parallel [(const_int 0)
3361
                     (const_int 1)
3362
                     (const_int 4)
3363
                     (const_int 5)])))]
3364
  "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3365
  "@
3366
   vmovlhps\t{%2, %1, %0|%0, %1, %2}
3367
   vmovhps\t{%2, %1, %0|%0, %1, %2}
3368
   vmovlps\t{%2, %H0|%H0, %2}"
3369
  [(set_attr "type" "ssemov")
3370
   (set_attr "prefix" "vex")
3371
   (set_attr "mode" "V4SF,V2SF,V2SF")])
3372
 
3373
(define_insn "sse_movlhps"
3374
  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,o")
3375
        (vec_select:V4SF
3376
          (vec_concat:V8SF
3377
            (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
3378
            (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
3379
          (parallel [(const_int 0)
3380
                     (const_int 1)
3381
                     (const_int 4)
3382
                     (const_int 5)])))]
3383
  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3384
  "@
3385
   movlhps\t{%2, %0|%0, %2}
3386
   movhps\t{%2, %0|%0, %2}
3387
   movlps\t{%2, %H0|%H0, %2}"
3388
  [(set_attr "type" "ssemov")
3389
   (set_attr "mode" "V4SF,V2SF,V2SF")])
3390
 
3391
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3392
(define_insn "avx_unpckhps256"
3393
  [(set (match_operand:V8SF 0 "register_operand" "=x")
3394
        (vec_select:V8SF
3395
          (vec_concat:V16SF
3396
            (match_operand:V8SF 1 "register_operand" "x")
3397
            (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3398
          (parallel [(const_int 2) (const_int 10)
3399
                     (const_int 3) (const_int 11)
3400
                     (const_int 6) (const_int 14)
3401
                     (const_int 7) (const_int 15)])))]
3402
  "TARGET_AVX"
3403
  "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3404
  [(set_attr "type" "sselog")
3405
   (set_attr "prefix" "vex")
3406
   (set_attr "mode" "V8SF")])
3407
 
3408
(define_insn "*avx_interleave_highv4sf"
3409
  [(set (match_operand:V4SF 0 "register_operand" "=x")
3410
        (vec_select:V4SF
3411
          (vec_concat:V8SF
3412
            (match_operand:V4SF 1 "register_operand" "x")
3413
            (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3414
          (parallel [(const_int 2) (const_int 6)
3415
                     (const_int 3) (const_int 7)])))]
3416
  "TARGET_AVX"
3417
  "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3418
  [(set_attr "type" "sselog")
3419
   (set_attr "prefix" "vex")
3420
   (set_attr "mode" "V4SF")])
3421
 
3422
(define_insn "vec_interleave_highv4sf"
3423
  [(set (match_operand:V4SF 0 "register_operand" "=x")
3424
        (vec_select:V4SF
3425
          (vec_concat:V8SF
3426
            (match_operand:V4SF 1 "register_operand" "0")
3427
            (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3428
          (parallel [(const_int 2) (const_int 6)
3429
                     (const_int 3) (const_int 7)])))]
3430
  "TARGET_SSE"
3431
  "unpckhps\t{%2, %0|%0, %2}"
3432
  [(set_attr "type" "sselog")
3433
   (set_attr "mode" "V4SF")])
3434
 
3435
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3436
(define_insn "avx_unpcklps256"
3437
  [(set (match_operand:V8SF 0 "register_operand" "=x")
3438
        (vec_select:V8SF
3439
          (vec_concat:V16SF
3440
            (match_operand:V8SF 1 "register_operand" "x")
3441
            (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3442
          (parallel [(const_int 0) (const_int 8)
3443
                     (const_int 1) (const_int 9)
3444
                     (const_int 4) (const_int 12)
3445
                     (const_int 5) (const_int 13)])))]
3446
  "TARGET_AVX"
3447
  "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3448
  [(set_attr "type" "sselog")
3449
   (set_attr "prefix" "vex")
3450
   (set_attr "mode" "V8SF")])
3451
 
3452
(define_insn "*avx_interleave_lowv4sf"
3453
  [(set (match_operand:V4SF 0 "register_operand" "=x")
3454
        (vec_select:V4SF
3455
          (vec_concat:V8SF
3456
            (match_operand:V4SF 1 "register_operand" "x")
3457
            (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3458
          (parallel [(const_int 0) (const_int 4)
3459
                     (const_int 1) (const_int 5)])))]
3460
  "TARGET_AVX"
3461
  "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3462
  [(set_attr "type" "sselog")
3463
   (set_attr "prefix" "vex")
3464
   (set_attr "mode" "V4SF")])
3465
 
3466
(define_insn "vec_interleave_lowv4sf"
3467
  [(set (match_operand:V4SF 0 "register_operand" "=x")
3468
        (vec_select:V4SF
3469
          (vec_concat:V8SF
3470
            (match_operand:V4SF 1 "register_operand" "0")
3471
            (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3472
          (parallel [(const_int 0) (const_int 4)
3473
                     (const_int 1) (const_int 5)])))]
3474
  "TARGET_SSE"
3475
  "unpcklps\t{%2, %0|%0, %2}"
3476
  [(set_attr "type" "sselog")
3477
   (set_attr "mode" "V4SF")])
3478
 
3479
;; These are modeled with the same vec_concat as the others so that we
3480
;; capture users of shufps that can use the new instructions
3481
(define_insn "avx_movshdup256"
3482
  [(set (match_operand:V8SF 0 "register_operand" "=x")
3483
        (vec_select:V8SF
3484
          (vec_concat:V16SF
3485
            (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3486
            (match_dup 1))
3487
          (parallel [(const_int 1) (const_int 1)
3488
                     (const_int 3) (const_int 3)
3489
                     (const_int 5) (const_int 5)
3490
                     (const_int 7) (const_int 7)])))]
3491
  "TARGET_AVX"
3492
  "vmovshdup\t{%1, %0|%0, %1}"
3493
  [(set_attr "type" "sse")
3494
   (set_attr "prefix" "vex")
3495
   (set_attr "mode" "V8SF")])
3496
 
3497
(define_insn "sse3_movshdup"
3498
  [(set (match_operand:V4SF 0 "register_operand" "=x")
3499
        (vec_select:V4SF
3500
          (vec_concat:V8SF
3501
            (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3502
            (match_dup 1))
3503
          (parallel [(const_int 1)
3504
                     (const_int 1)
3505
                     (const_int 7)
3506
                     (const_int 7)])))]
3507
  "TARGET_SSE3"
3508
  "%vmovshdup\t{%1, %0|%0, %1}"
3509
  [(set_attr "type" "sse")
3510
   (set_attr "prefix_rep" "1")
3511
   (set_attr "prefix" "maybe_vex")
3512
   (set_attr "mode" "V4SF")])
3513
 
3514
(define_insn "avx_movsldup256"
3515
  [(set (match_operand:V8SF 0 "register_operand" "=x")
3516
        (vec_select:V8SF
3517
          (vec_concat:V16SF
3518
            (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3519
            (match_dup 1))
3520
          (parallel [(const_int 0) (const_int 0)
3521
                     (const_int 2) (const_int 2)
3522
                     (const_int 4) (const_int 4)
3523
                     (const_int 6) (const_int 6)])))]
3524
  "TARGET_AVX"
3525
  "vmovsldup\t{%1, %0|%0, %1}"
3526
  [(set_attr "type" "sse")
3527
   (set_attr "prefix" "vex")
3528
   (set_attr "mode" "V8SF")])
3529
 
3530
(define_insn "sse3_movsldup"
3531
  [(set (match_operand:V4SF 0 "register_operand" "=x")
3532
        (vec_select:V4SF
3533
          (vec_concat:V8SF
3534
            (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3535
            (match_dup 1))
3536
          (parallel [(const_int 0)
3537
                     (const_int 0)
3538
                     (const_int 6)
3539
                     (const_int 6)])))]
3540
  "TARGET_SSE3"
3541
  "%vmovsldup\t{%1, %0|%0, %1}"
3542
  [(set_attr "type" "sse")
3543
   (set_attr "prefix_rep" "1")
3544
   (set_attr "prefix" "maybe_vex")
3545
   (set_attr "mode" "V4SF")])
3546
 
3547
(define_expand "avx_shufps256"
3548
  [(match_operand:V8SF 0 "register_operand" "")
3549
   (match_operand:V8SF 1 "register_operand" "")
3550
   (match_operand:V8SF 2 "nonimmediate_operand" "")
3551
   (match_operand:SI 3 "const_int_operand" "")]
3552
  "TARGET_AVX"
3553
{
3554
  int mask = INTVAL (operands[3]);
3555
  emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3556
                                  GEN_INT ((mask >> 0) & 3),
3557
                                  GEN_INT ((mask >> 2) & 3),
3558
                                  GEN_INT (((mask >> 4) & 3) + 8),
3559
                                  GEN_INT (((mask >> 6) & 3) + 8),
3560
                                  GEN_INT (((mask >> 0) & 3) + 4),
3561
                                  GEN_INT (((mask >> 2) & 3) + 4),
3562
                                  GEN_INT (((mask >> 4) & 3) + 12),
3563
                                  GEN_INT (((mask >> 6) & 3) + 12)));
3564
  DONE;
3565
})
3566
 
3567
;; One bit in mask selects 2 elements.
3568
(define_insn "avx_shufps256_1"
3569
  [(set (match_operand:V8SF 0 "register_operand" "=x")
3570
        (vec_select:V8SF
3571
          (vec_concat:V16SF
3572
            (match_operand:V8SF 1 "register_operand" "x")
3573
            (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3574
          (parallel [(match_operand 3  "const_0_to_3_operand"   "")
3575
                     (match_operand 4  "const_0_to_3_operand"   "")
3576
                     (match_operand 5  "const_8_to_11_operand"  "")
3577
                     (match_operand 6  "const_8_to_11_operand"  "")
3578
                     (match_operand 7  "const_4_to_7_operand"   "")
3579
                     (match_operand 8  "const_4_to_7_operand"   "")
3580
                     (match_operand 9  "const_12_to_15_operand" "")
3581
                     (match_operand 10 "const_12_to_15_operand" "")])))]
3582
  "TARGET_AVX
3583
   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3584
       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3585
       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3586
       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3587
{
3588
  int mask;
3589
  mask = INTVAL (operands[3]);
3590
  mask |= INTVAL (operands[4]) << 2;
3591
  mask |= (INTVAL (operands[5]) - 8) << 4;
3592
  mask |= (INTVAL (operands[6]) - 8) << 6;
3593
  operands[3] = GEN_INT (mask);
3594
 
3595
  return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3596
}
3597
  [(set_attr "type" "sselog")
3598
   (set_attr "length_immediate" "1")
3599
   (set_attr "prefix" "vex")
3600
   (set_attr "mode" "V8SF")])
3601
 
3602
(define_expand "sse_shufps"
3603
  [(match_operand:V4SF 0 "register_operand" "")
3604
   (match_operand:V4SF 1 "register_operand" "")
3605
   (match_operand:V4SF 2 "nonimmediate_operand" "")
3606
   (match_operand:SI 3 "const_int_operand" "")]
3607
  "TARGET_SSE"
3608
{
3609
  int mask = INTVAL (operands[3]);
3610
  emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3611
                               GEN_INT ((mask >> 0) & 3),
3612
                               GEN_INT ((mask >> 2) & 3),
3613
                               GEN_INT (((mask >> 4) & 3) + 4),
3614
                               GEN_INT (((mask >> 6) & 3) + 4)));
3615
  DONE;
3616
})
3617
 
3618
(define_insn "*avx_shufps_"
3619
  [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3620
        (vec_select:SSEMODE4S
3621
          (vec_concat:
3622
            (match_operand:SSEMODE4S 1 "register_operand" "x")
3623
            (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3624
          (parallel [(match_operand 3 "const_0_to_3_operand" "")
3625
                     (match_operand 4 "const_0_to_3_operand" "")
3626
                     (match_operand 5 "const_4_to_7_operand" "")
3627
                     (match_operand 6 "const_4_to_7_operand" "")])))]
3628
  "TARGET_AVX"
3629
{
3630
  int mask = 0;
3631
  mask |= INTVAL (operands[3]) << 0;
3632
  mask |= INTVAL (operands[4]) << 2;
3633
  mask |= (INTVAL (operands[5]) - 4) << 4;
3634
  mask |= (INTVAL (operands[6]) - 4) << 6;
3635
  operands[3] = GEN_INT (mask);
3636
 
3637
  return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3638
}
3639
  [(set_attr "type" "sselog")
3640
   (set_attr "length_immediate" "1")
3641
   (set_attr "prefix" "vex")
3642
   (set_attr "mode" "V4SF")])
3643
 
3644
(define_insn "sse_shufps_"
3645
  [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3646
        (vec_select:SSEMODE4S
3647
          (vec_concat:
3648
            (match_operand:SSEMODE4S 1 "register_operand" "0")
3649
            (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
3650
          (parallel [(match_operand 3 "const_0_to_3_operand" "")
3651
                     (match_operand 4 "const_0_to_3_operand" "")
3652
                     (match_operand 5 "const_4_to_7_operand" "")
3653
                     (match_operand 6 "const_4_to_7_operand" "")])))]
3654
  "TARGET_SSE"
3655
{
3656
  int mask = 0;
3657
  mask |= INTVAL (operands[3]) << 0;
3658
  mask |= INTVAL (operands[4]) << 2;
3659
  mask |= (INTVAL (operands[5]) - 4) << 4;
3660
  mask |= (INTVAL (operands[6]) - 4) << 6;
3661
  operands[3] = GEN_INT (mask);
3662
 
3663
  return "shufps\t{%3, %2, %0|%0, %2, %3}";
3664
}
3665
  [(set_attr "type" "sselog")
3666
   (set_attr "length_immediate" "1")
3667
   (set_attr "mode" "V4SF")])
3668
 
3669
(define_insn "sse_storehps"
3670
  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3671
        (vec_select:V2SF
3672
          (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3673
          (parallel [(const_int 2) (const_int 3)])))]
3674
  "TARGET_SSE"
3675
  "@
3676
   %vmovhps\t{%1, %0|%0, %1}
3677
   %vmovhlps\t{%1, %d0|%d0, %1}
3678
   %vmovlps\t{%H1, %d0|%d0, %H1}"
3679
  [(set_attr "type" "ssemov")
3680
   (set_attr "prefix" "maybe_vex")
3681
   (set_attr "mode" "V2SF,V4SF,V2SF")])
3682
 
3683
(define_expand "sse_loadhps_exp"
3684
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3685
        (vec_concat:V4SF
3686
          (vec_select:V2SF
3687
            (match_operand:V4SF 1 "nonimmediate_operand" "")
3688
            (parallel [(const_int 0) (const_int 1)]))
3689
          (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3690
  "TARGET_SSE"
3691
  "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3692
 
3693
(define_insn "*avx_loadhps"
3694
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3695
        (vec_concat:V4SF
3696
          (vec_select:V2SF
3697
            (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3698
            (parallel [(const_int 0) (const_int 1)]))
3699
          (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3700
  "TARGET_AVX"
3701
  "@
3702
   vmovhps\t{%2, %1, %0|%0, %1, %2}
3703
   vmovlhps\t{%2, %1, %0|%0, %1, %2}
3704
   vmovlps\t{%2, %H0|%H0, %2}"
3705
  [(set_attr "type" "ssemov")
3706
   (set_attr "prefix" "vex")
3707
   (set_attr "mode" "V2SF,V4SF,V2SF")])
3708
 
3709
(define_insn "sse_loadhps"
3710
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
3711
        (vec_concat:V4SF
3712
          (vec_select:V2SF
3713
            (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
3714
            (parallel [(const_int 0) (const_int 1)]))
3715
          (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
3716
  "TARGET_SSE"
3717
  "@
3718
   movhps\t{%2, %0|%0, %2}
3719
   movlhps\t{%2, %0|%0, %2}
3720
   movlps\t{%2, %H0|%H0, %2}"
3721
  [(set_attr "type" "ssemov")
3722
   (set_attr "mode" "V2SF,V4SF,V2SF")])
3723
 
3724
(define_insn "*avx_storelps"
3725
  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3726
        (vec_select:V2SF
3727
          (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3728
          (parallel [(const_int 0) (const_int 1)])))]
3729
  "TARGET_AVX"
3730
  "@
3731
   vmovlps\t{%1, %0|%0, %1}
3732
   vmovaps\t{%1, %0|%0, %1}
3733
   vmovlps\t{%1, %0, %0|%0, %0, %1}"
3734
  [(set_attr "type" "ssemov")
3735
   (set_attr "prefix" "vex")
3736
   (set_attr "mode" "V2SF,V2DF,V2SF")])
3737
 
3738
(define_insn "sse_storelps"
3739
  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3740
        (vec_select:V2SF
3741
          (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
3742
          (parallel [(const_int 0) (const_int 1)])))]
3743
  "TARGET_SSE"
3744
  "@
3745
   movlps\t{%1, %0|%0, %1}
3746
   movaps\t{%1, %0|%0, %1}
3747
   movlps\t{%1, %0|%0, %1}"
3748
  [(set_attr "type" "ssemov")
3749
   (set_attr "mode" "V2SF,V4SF,V2SF")])
3750
 
3751
(define_expand "sse_loadlps_exp"
3752
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3753
        (vec_concat:V4SF
3754
          (match_operand:V2SF 2 "nonimmediate_operand" "")
3755
          (vec_select:V2SF
3756
            (match_operand:V4SF 1 "nonimmediate_operand" "")
3757
            (parallel [(const_int 2) (const_int 3)]))))]
3758
  "TARGET_SSE"
3759
  "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
3760
 
3761
(define_insn "*avx_loadlps"
3762
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3763
        (vec_concat:V4SF
3764
          (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
3765
          (vec_select:V2SF
3766
            (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
3767
            (parallel [(const_int 2) (const_int 3)]))))]
3768
  "TARGET_AVX"
3769
  "@
3770
   shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3771
   vmovlps\t{%2, %1, %0|%0, %1, %2}
3772
   vmovlps\t{%2, %0|%0, %2}"
3773
  [(set_attr "type" "sselog,ssemov,ssemov")
3774
   (set_attr "length_immediate" "1,*,*")
3775
   (set_attr "prefix" "vex")
3776
   (set_attr "mode" "V4SF,V2SF,V2SF")])
3777
 
3778
(define_insn "sse_loadlps"
3779
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3780
        (vec_concat:V4SF
3781
          (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
3782
          (vec_select:V2SF
3783
            (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
3784
            (parallel [(const_int 2) (const_int 3)]))))]
3785
  "TARGET_SSE"
3786
  "@
3787
   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3788
   movlps\t{%2, %0|%0, %2}
3789
   movlps\t{%2, %0|%0, %2}"
3790
  [(set_attr "type" "sselog,ssemov,ssemov")
3791
   (set_attr "length_immediate" "1,*,*")
3792
   (set_attr "mode" "V4SF,V2SF,V2SF")])
3793
 
3794
(define_insn "*avx_movss"
3795
  [(set (match_operand:V4SF 0 "register_operand" "=x")
3796
        (vec_merge:V4SF
3797
          (match_operand:V4SF 2 "register_operand" "x")
3798
          (match_operand:V4SF 1 "register_operand" "x")
3799
          (const_int 1)))]
3800
  "TARGET_AVX"
3801
  "vmovss\t{%2, %1, %0|%0, %1, %2}"
3802
  [(set_attr "type" "ssemov")
3803
   (set_attr "prefix" "vex")
3804
   (set_attr "mode" "SF")])
3805
 
3806
(define_insn "sse_movss"
3807
  [(set (match_operand:V4SF 0 "register_operand" "=x")
3808
        (vec_merge:V4SF
3809
          (match_operand:V4SF 2 "register_operand" "x")
3810
          (match_operand:V4SF 1 "register_operand" "0")
3811
          (const_int 1)))]
3812
  "TARGET_SSE"
3813
  "movss\t{%2, %0|%0, %2}"
3814
  [(set_attr "type" "ssemov")
3815
   (set_attr "mode" "SF")])
3816
 
3817
(define_expand "vec_dupv4sf"
3818
  [(set (match_operand:V4SF 0 "register_operand" "")
3819
        (vec_duplicate:V4SF
3820
          (match_operand:SF 1 "nonimmediate_operand" "")))]
3821
  "TARGET_SSE"
3822
{
3823
  if (!TARGET_AVX)
3824
    operands[1] = force_reg (V4SFmode, operands[1]);
3825
})
3826
 
3827
(define_insn "*vec_dupv4sf_avx"
3828
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3829
        (vec_duplicate:V4SF
3830
          (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3831
  "TARGET_AVX"
3832
  "@
3833
   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3834
   vbroadcastss\t{%1, %0|%0, %1}"
3835
  [(set_attr "type" "sselog1,ssemov")
3836
   (set_attr "length_immediate" "1,0")
3837
   (set_attr "prefix_extra" "0,1")
3838
   (set_attr "prefix" "vex")
3839
   (set_attr "mode" "V4SF")])
3840
 
3841
(define_insn "*vec_dupv4sf"
3842
  [(set (match_operand:V4SF 0 "register_operand" "=x")
3843
        (vec_duplicate:V4SF
3844
          (match_operand:SF 1 "register_operand" "0")))]
3845
  "TARGET_SSE"
3846
  "shufps\t{$0, %0, %0|%0, %0, 0}"
3847
  [(set_attr "type" "sselog1")
3848
   (set_attr "length_immediate" "1")
3849
   (set_attr "mode" "V4SF")])
3850
 
3851
(define_insn "*vec_concatv2sf_avx"
3852
  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,x,*y ,*y")
3853
        (vec_concat:V2SF
3854
          (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
3855
          (match_operand:SF 2 "vector_move_operand"  " x,m,C,*ym, C")))]
3856
  "TARGET_AVX"
3857
  "@
3858
   vunpcklps\t{%2, %1, %0|%0, %1, %2}
3859
   vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3860
   vmovss\t{%1, %0|%0, %1}
3861
   punpckldq\t{%2, %0|%0, %2}
3862
   movd\t{%1, %0|%0, %1}"
3863
  [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3864
   (set_attr "length_immediate" "*,1,*,*,*")
3865
   (set_attr "prefix_extra" "*,1,*,*,*")
3866
   (set (attr "prefix")
3867
     (if_then_else (eq_attr "alternative" "3,4")
3868
       (const_string "orig")
3869
       (const_string "vex")))
3870
   (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3871
 
3872
;; Although insertps takes register source, we prefer
3873
;; unpcklps with register source since it is shorter.
3874
(define_insn "*vec_concatv2sf_sse4_1"
3875
  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,x,*y ,*y")
3876
        (vec_concat:V2SF
3877
          (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
3878
          (match_operand:SF 2 "vector_move_operand"  " x,m,C,*ym, C")))]
3879
  "TARGET_SSE4_1"
3880
  "@
3881
   unpcklps\t{%2, %0|%0, %2}
3882
   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3883
   movss\t{%1, %0|%0, %1}
3884
   punpckldq\t{%2, %0|%0, %2}
3885
   movd\t{%1, %0|%0, %1}"
3886
  [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3887
   (set_attr "prefix_data16" "*,1,*,*,*")
3888
   (set_attr "prefix_extra" "*,1,*,*,*")
3889
   (set_attr "length_immediate" "*,1,*,*,*")
3890
   (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3891
 
3892
;; ??? In theory we can match memory for the MMX alternative, but allowing
3893
;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3894
;; alternatives pretty much forces the MMX alternative to be chosen.
3895
(define_insn "*vec_concatv2sf_sse"
3896
  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
3897
        (vec_concat:V2SF
3898
          (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3899
          (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
3900
  "TARGET_SSE"
3901
  "@
3902
   unpcklps\t{%2, %0|%0, %2}
3903
   movss\t{%1, %0|%0, %1}
3904
   punpckldq\t{%2, %0|%0, %2}
3905
   movd\t{%1, %0|%0, %1}"
3906
  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3907
   (set_attr "mode" "V4SF,SF,DI,DI")])
3908
 
3909
(define_insn "*vec_concatv4sf_avx"
3910
  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
3911
        (vec_concat:V4SF
3912
          (match_operand:V2SF 1 "register_operand" " x,x")
3913
          (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3914
  "TARGET_AVX"
3915
  "@
3916
   vmovlhps\t{%2, %1, %0|%0, %1, %2}
3917
   vmovhps\t{%2, %1, %0|%0, %1, %2}"
3918
  [(set_attr "type" "ssemov")
3919
   (set_attr "prefix" "vex")
3920
   (set_attr "mode" "V4SF,V2SF")])
3921
 
3922
(define_insn "*vec_concatv4sf_sse"
3923
  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
3924
        (vec_concat:V4SF
3925
          (match_operand:V2SF 1 "register_operand" " 0,0")
3926
          (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
3927
  "TARGET_SSE"
3928
  "@
3929
   movlhps\t{%2, %0|%0, %2}
3930
   movhps\t{%2, %0|%0, %2}"
3931
  [(set_attr "type" "ssemov")
3932
   (set_attr "mode" "V4SF,V2SF")])
3933
 
3934
(define_expand "vec_init"
3935
  [(match_operand:SSEMODE 0 "register_operand" "")
3936
   (match_operand 1 "" "")]
3937
  "TARGET_SSE"
3938
{
3939
  ix86_expand_vector_init (false, operands[0], operands[1]);
3940
  DONE;
3941
})
3942
 
3943
(define_insn "*vec_set_0_avx"
3944
  [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand"  "=x,x, x,x,  x,m")
3945
        (vec_merge:SSEMODE4S
3946
          (vec_duplicate:SSEMODE4S
3947
            (match_operand: 2
3948
              "general_operand"                            " x,m,*r,x,*rm,x*rfF"))
3949
          (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x,  x,0")
3950
          (const_int 1)))]
3951
  "TARGET_AVX"
3952
  "@
3953
   vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
3954
   vmov\t{%2, %0|%0, %2}
3955
   vmovd\t{%2, %0|%0, %2}
3956
   vmovss\t{%2, %1, %0|%0, %1, %2}
3957
   vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3958
   #"
3959
  [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3960
   (set_attr "prefix_extra" "*,*,*,*,1,*")
3961
   (set_attr "length_immediate" "*,*,*,*,1,*")
3962
   (set_attr "prefix" "vex")
3963
   (set_attr "mode" "SF,,SI,SF,TI,*")])
3964
 
3965
(define_insn "*vec_set_0_sse4_1"
3966
  [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand"  "=x,x, x,x,  x,m")
3967
        (vec_merge:SSEMODE4S
3968
          (vec_duplicate:SSEMODE4S
3969
            (match_operand: 2
3970
              "general_operand"                            " x,m,*r,x,*rm,*rfF"))
3971
          (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0,  0,0")
3972
          (const_int 1)))]
3973
  "TARGET_SSE4_1"
3974
  "@
3975
   insertps\t{$0xe, %2, %0|%0, %2, 0xe}
3976
   mov\t{%2, %0|%0, %2}
3977
   movd\t{%2, %0|%0, %2}
3978
   movss\t{%2, %0|%0, %2}
3979
   pinsrd\t{$0, %2, %0|%0, %2, 0}
3980
   #"
3981
  [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3982
   (set_attr "prefix_extra" "*,*,*,*,1,*")
3983
   (set_attr "length_immediate" "*,*,*,*,1,*")
3984
   (set_attr "mode" "SF,,SI,SF,TI,*")])
3985
 
3986
(define_insn "*vec_set_0_sse2"
3987
  [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand"  "=x, x,x,m")
3988
        (vec_merge:SSEMODE4S
3989
          (vec_duplicate:SSEMODE4S
3990
            (match_operand: 2
3991
              "general_operand"                            " m,*r,x,x*rfF"))
3992
          (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
3993
          (const_int 1)))]
3994
  "TARGET_SSE2"
3995
  "@
3996
   mov\t{%2, %0|%0, %2}
3997
   movd\t{%2, %0|%0, %2}
3998
   movss\t{%2, %0|%0, %2}
3999
   #"
4000
  [(set_attr "type" "ssemov")
4001
   (set_attr "mode" ",SI,SF,*")])
4002
 
4003
(define_insn "vec_set_0"
4004
  [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand"  "=x,x,m")
4005
        (vec_merge:SSEMODE4S
4006
          (vec_duplicate:SSEMODE4S
4007
            (match_operand: 2
4008
              "general_operand"                            " m,x,x*rfF"))
4009
          (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
4010
          (const_int 1)))]
4011
  "TARGET_SSE"
4012
  "@
4013
   movss\t{%2, %0|%0, %2}
4014
   movss\t{%2, %0|%0, %2}
4015
   #"
4016
  [(set_attr "type" "ssemov")
4017
   (set_attr "mode" "SF")])
4018
 
4019
;; A subset is vec_setv4sf.
4020
(define_insn "*vec_setv4sf_avx"
4021
  [(set (match_operand:V4SF 0 "register_operand" "=x")
4022
        (vec_merge:V4SF
4023
          (vec_duplicate:V4SF
4024
            (match_operand:SF 2 "nonimmediate_operand" "xm"))
4025
          (match_operand:V4SF 1 "register_operand" "x")
4026
          (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4027
  "TARGET_AVX"
4028
{
4029
  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4030
  return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4031
}
4032
  [(set_attr "type" "sselog")
4033
   (set_attr "prefix_extra" "1")
4034
   (set_attr "length_immediate" "1")
4035
   (set_attr "prefix" "vex")
4036
   (set_attr "mode" "V4SF")])
4037
 
4038
(define_insn "*vec_setv4sf_sse4_1"
4039
  [(set (match_operand:V4SF 0 "register_operand" "=x")
4040
        (vec_merge:V4SF
4041
          (vec_duplicate:V4SF
4042
            (match_operand:SF 2 "nonimmediate_operand" "xm"))
4043
          (match_operand:V4SF 1 "register_operand" "0")
4044
          (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
4045
  "TARGET_SSE4_1"
4046
{
4047
  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
4048
  return "insertps\t{%3, %2, %0|%0, %2, %3}";
4049
}
4050
  [(set_attr "type" "sselog")
4051
   (set_attr "prefix_data16" "1")
4052
   (set_attr "prefix_extra" "1")
4053
   (set_attr "length_immediate" "1")
4054
   (set_attr "mode" "V4SF")])
4055
 
4056
(define_insn "*avx_insertps"
4057
  [(set (match_operand:V4SF 0 "register_operand" "=x")
4058
        (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
4059
                      (match_operand:V4SF 1 "register_operand" "x")
4060
                      (match_operand:SI 3 "const_0_to_255_operand" "n")]
4061
                     UNSPEC_INSERTPS))]
4062
  "TARGET_AVX"
4063
  "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4064
  [(set_attr "type" "sselog")
4065
   (set_attr "prefix" "vex")
4066
   (set_attr "prefix_extra" "1")
4067
   (set_attr "length_immediate" "1")
4068
   (set_attr "mode" "V4SF")])
4069
 
4070
(define_insn "sse4_1_insertps"
4071
  [(set (match_operand:V4SF 0 "register_operand" "=x")
4072
        (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
4073
                      (match_operand:V4SF 1 "register_operand" "0")
4074
                      (match_operand:SI 3 "const_0_to_255_operand" "n")]
4075
                     UNSPEC_INSERTPS))]
4076
  "TARGET_SSE4_1"
4077
  "insertps\t{%3, %2, %0|%0, %2, %3}";
4078
  [(set_attr "type" "sselog")
4079
   (set_attr "prefix_data16" "1")
4080
   (set_attr "prefix_extra" "1")
4081
   (set_attr "length_immediate" "1")
4082
   (set_attr "mode" "V4SF")])
4083
 
4084
(define_split
4085
  [(set (match_operand:V4SF 0 "memory_operand" "")
4086
        (vec_merge:V4SF
4087
          (vec_duplicate:V4SF
4088
            (match_operand:SF 1 "nonmemory_operand" ""))
4089
          (match_dup 0)
4090
          (const_int 1)))]
4091
  "TARGET_SSE && reload_completed"
4092
  [(const_int 0)]
4093
{
4094
  emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
4095
  DONE;
4096
})
4097
 
4098
(define_expand "vec_set"
4099
  [(match_operand:SSEMODE 0 "register_operand" "")
4100
   (match_operand: 1 "register_operand" "")
4101
   (match_operand 2 "const_int_operand" "")]
4102
  "TARGET_SSE"
4103
{
4104
  ix86_expand_vector_set (false, operands[0], operands[1],
4105
                          INTVAL (operands[2]));
4106
  DONE;
4107
})
4108
 
4109
(define_insn_and_split "*vec_extractv4sf_0"
4110
  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
4111
        (vec_select:SF
4112
          (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
4113
          (parallel [(const_int 0)])))]
4114
  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4115
  "#"
4116
  "&& reload_completed"
4117
  [(const_int 0)]
4118
{
4119
  rtx op1 = operands[1];
4120
  if (REG_P (op1))
4121
    op1 = gen_rtx_REG (SFmode, REGNO (op1));
4122
  else
4123
    op1 = gen_lowpart (SFmode, op1);
4124
  emit_move_insn (operands[0], op1);
4125
  DONE;
4126
})
4127
 
4128
(define_expand "avx_vextractf128"
4129
  [(match_operand: 0 "nonimmediate_operand" "")
4130
   (match_operand:AVX256MODE 1 "register_operand" "")
4131
   (match_operand:SI 2 "const_0_to_1_operand" "")]
4132
  "TARGET_AVX"
4133
{
4134
  switch (INTVAL (operands[2]))
4135
    {
4136
    case 0:
4137
      emit_insn (gen_vec_extract_lo_ (operands[0], operands[1]));
4138
      break;
4139
    case 1:
4140
      emit_insn (gen_vec_extract_hi_ (operands[0], operands[1]));
4141
      break;
4142
    default:
4143
      gcc_unreachable ();
4144
    }
4145
  DONE;
4146
})
4147
 
4148
(define_insn "vec_extract_lo_"
4149
  [(set (match_operand: 0 "nonimmediate_operand" "=x,m")
4150
        (vec_select:
4151
          (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4152
          (parallel [(const_int 0) (const_int 1)])))]
4153
  "TARGET_AVX"
4154
  "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
4155
  [(set_attr "type" "sselog")
4156
   (set_attr "prefix_extra" "1")
4157
   (set_attr "length_immediate" "1")
4158
   (set_attr "memory" "none,store")
4159
   (set_attr "prefix" "vex")
4160
   (set_attr "mode" "V8SF")])
4161
 
4162
(define_insn "vec_extract_hi_"
4163
  [(set (match_operand: 0 "nonimmediate_operand" "=x,m")
4164
        (vec_select:
4165
          (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
4166
          (parallel [(const_int 2) (const_int 3)])))]
4167
  "TARGET_AVX"
4168
  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4169
  [(set_attr "type" "sselog")
4170
   (set_attr "prefix_extra" "1")
4171
   (set_attr "length_immediate" "1")
4172
   (set_attr "memory" "none,store")
4173
   (set_attr "prefix" "vex")
4174
   (set_attr "mode" "V8SF")])
4175
 
4176
(define_insn "vec_extract_lo_"
4177
  [(set (match_operand: 0 "nonimmediate_operand" "=x,m")
4178
        (vec_select:
4179
          (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4180
          (parallel [(const_int 0) (const_int 1)
4181
                     (const_int 2) (const_int 3)])))]
4182
  "TARGET_AVX"
4183
  "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
4184
  [(set_attr "type" "sselog")
4185
   (set_attr "prefix_extra" "1")
4186
   (set_attr "length_immediate" "1")
4187
   (set_attr "memory" "none,store")
4188
   (set_attr "prefix" "vex")
4189
   (set_attr "mode" "V8SF")])
4190
 
4191
(define_insn "vec_extract_hi_"
4192
  [(set (match_operand: 0 "nonimmediate_operand" "=x,m")
4193
        (vec_select:
4194
          (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
4195
          (parallel [(const_int 4) (const_int 5)
4196
                     (const_int 6) (const_int 7)])))]
4197
  "TARGET_AVX"
4198
  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4199
  [(set_attr "type" "sselog")
4200
   (set_attr "prefix_extra" "1")
4201
   (set_attr "length_immediate" "1")
4202
   (set_attr "memory" "none,store")
4203
   (set_attr "prefix" "vex")
4204
   (set_attr "mode" "V8SF")])
4205
 
4206
(define_insn "vec_extract_lo_v16hi"
4207
  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4208
        (vec_select:V8HI
4209
          (match_operand:V16HI 1 "register_operand" "x,x")
4210
          (parallel [(const_int 0) (const_int 1)
4211
                     (const_int 2) (const_int 3)
4212
                     (const_int 4) (const_int 5)
4213
                     (const_int 6) (const_int 7)])))]
4214
  "TARGET_AVX"
4215
  "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
4216
  [(set_attr "type" "sselog")
4217
   (set_attr "prefix_extra" "1")
4218
   (set_attr "length_immediate" "1")
4219
   (set_attr "memory" "none,store")
4220
   (set_attr "prefix" "vex")
4221
   (set_attr "mode" "V8SF")])
4222
 
4223
(define_insn "vec_extract_hi_v16hi"
4224
  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4225
        (vec_select:V8HI
4226
          (match_operand:V16HI 1 "register_operand" "x,x")
4227
          (parallel [(const_int 8) (const_int 9)
4228
                     (const_int 10) (const_int 11)
4229
                     (const_int 12) (const_int 13)
4230
                     (const_int 14) (const_int 15)])))]
4231
  "TARGET_AVX"
4232
  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4233
  [(set_attr "type" "sselog")
4234
   (set_attr "prefix_extra" "1")
4235
   (set_attr "length_immediate" "1")
4236
   (set_attr "memory" "none,store")
4237
   (set_attr "prefix" "vex")
4238
   (set_attr "mode" "V8SF")])
4239
 
4240
(define_insn "vec_extract_lo_v32qi"
4241
  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4242
        (vec_select:V16QI
4243
          (match_operand:V32QI 1 "register_operand" "x,x")
4244
          (parallel [(const_int 0) (const_int 1)
4245
                     (const_int 2) (const_int 3)
4246
                     (const_int 4) (const_int 5)
4247
                     (const_int 6) (const_int 7)
4248
                     (const_int 8) (const_int 9)
4249
                     (const_int 10) (const_int 11)
4250
                     (const_int 12) (const_int 13)
4251
                     (const_int 14) (const_int 15)])))]
4252
  "TARGET_AVX"
4253
  "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
4254
  [(set_attr "type" "sselog")
4255
   (set_attr "prefix_extra" "1")
4256
   (set_attr "length_immediate" "1")
4257
   (set_attr "memory" "none,store")
4258
   (set_attr "prefix" "vex")
4259
   (set_attr "mode" "V8SF")])
4260
 
4261
(define_insn "vec_extract_hi_v32qi"
4262
  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4263
        (vec_select:V16QI
4264
          (match_operand:V32QI 1 "register_operand" "x,x")
4265
          (parallel [(const_int 16) (const_int 17)
4266
                     (const_int 18) (const_int 19)
4267
                     (const_int 20) (const_int 21)
4268
                     (const_int 22) (const_int 23)
4269
                     (const_int 24) (const_int 25)
4270
                     (const_int 26) (const_int 27)
4271
                     (const_int 28) (const_int 29)
4272
                     (const_int 30) (const_int 31)])))]
4273
  "TARGET_AVX"
4274
  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
4275
  [(set_attr "type" "sselog")
4276
   (set_attr "prefix_extra" "1")
4277
   (set_attr "length_immediate" "1")
4278
   (set_attr "memory" "none,store")
4279
   (set_attr "prefix" "vex")
4280
   (set_attr "mode" "V8SF")])
4281
 
4282
(define_insn "*sse4_1_extractps"
4283
  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
4284
        (vec_select:SF
4285
          (match_operand:V4SF 1 "register_operand" "x")
4286
          (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
4287
  "TARGET_SSE4_1"
4288
  "%vextractps\t{%2, %1, %0|%0, %1, %2}"
4289
  [(set_attr "type" "sselog")
4290
   (set_attr "prefix_data16" "1")
4291
   (set_attr "prefix_extra" "1")
4292
   (set_attr "length_immediate" "1")
4293
   (set_attr "prefix" "maybe_vex")
4294
   (set_attr "mode" "V4SF")])
4295
 
4296
(define_insn_and_split "*vec_extract_v4sf_mem"
4297
  [(set (match_operand:SF 0 "register_operand" "=x*rf")
4298
       (vec_select:SF
4299
         (match_operand:V4SF 1 "memory_operand" "o")
4300
         (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
4301
  ""
4302
  "#"
4303
  "reload_completed"
4304
  [(const_int 0)]
4305
{
4306
  int i = INTVAL (operands[2]);
4307
 
4308
  emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
4309
  DONE;
4310
})
4311
 
4312
(define_expand "vec_extract"
4313
  [(match_operand: 0 "register_operand" "")
4314
   (match_operand:SSEMODE 1 "register_operand" "")
4315
   (match_operand 2 "const_int_operand" "")]
4316
  "TARGET_SSE"
4317
{
4318
  ix86_expand_vector_extract (false, operands[0], operands[1],
4319
                              INTVAL (operands[2]));
4320
  DONE;
4321
})
4322
 
4323
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4324
;;
4325
;; Parallel double-precision floating point element swizzling
4326
;;
4327
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4328
 
4329
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4330
(define_insn "avx_unpckhpd256"
4331
  [(set (match_operand:V4DF 0 "register_operand" "=x")
4332
        (vec_select:V4DF
4333
          (vec_concat:V8DF
4334
            (match_operand:V4DF 1 "register_operand" "x")
4335
            (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4336
          (parallel [(const_int 1) (const_int 5)
4337
                     (const_int 3) (const_int 7)])))]
4338
  "TARGET_AVX"
4339
  "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4340
  [(set_attr "type" "sselog")
4341
   (set_attr "prefix" "vex")
4342
   (set_attr "mode" "V4DF")])
4343
 
4344
(define_expand "vec_interleave_highv2df"
4345
  [(set (match_operand:V2DF 0 "register_operand" "")
4346
        (vec_select:V2DF
4347
          (vec_concat:V4DF
4348
            (match_operand:V2DF 1 "nonimmediate_operand" "")
4349
            (match_operand:V2DF 2 "nonimmediate_operand" ""))
4350
          (parallel [(const_int 1)
4351
                     (const_int 3)])))]
4352
  "TARGET_SSE2"
4353
{
4354
  if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4355
    operands[2] = force_reg (V2DFmode, operands[2]);
4356
})
4357
 
4358
(define_insn "*avx_interleave_highv2df"
4359
  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,m")
4360
        (vec_select:V2DF
4361
          (vec_concat:V4DF
4362
            (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
4363
            (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
4364
          (parallel [(const_int 1)
4365
                     (const_int 3)])))]
4366
  "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4367
  "@
4368
   vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4369
   vmovddup\t{%H1, %0|%0, %H1}
4370
   vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4371
   vmovhpd\t{%1, %0|%0, %1}"
4372
  [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4373
   (set_attr "prefix" "vex")
4374
   (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4375
 
4376
(define_insn "*sse3_interleave_highv2df"
4377
  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,m")
4378
        (vec_select:V2DF
4379
          (vec_concat:V4DF
4380
            (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4381
            (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4382
          (parallel [(const_int 1)
4383
                     (const_int 3)])))]
4384
  "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4385
  "@
4386
   unpckhpd\t{%2, %0|%0, %2}
4387
   movddup\t{%H1, %0|%0, %H1}
4388
   movlpd\t{%H1, %0|%0, %H1}
4389
   movhpd\t{%1, %0|%0, %1}"
4390
  [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4391
   (set_attr "prefix_data16" "*,*,1,1")
4392
   (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4393
 
4394
(define_insn "*sse2_interleave_highv2df"
4395
  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,m")
4396
        (vec_select:V2DF
4397
          (vec_concat:V4DF
4398
            (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
4399
            (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
4400
          (parallel [(const_int 1)
4401
                     (const_int 3)])))]
4402
  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4403
  "@
4404
   unpckhpd\t{%2, %0|%0, %2}
4405
   movlpd\t{%H1, %0|%0, %H1}
4406
   movhpd\t{%1, %0|%0, %1}"
4407
  [(set_attr "type" "sselog,ssemov,ssemov")
4408
   (set_attr "prefix_data16" "*,1,1")
4409
   (set_attr "mode" "V2DF,V1DF,V1DF")])
4410
 
4411
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4412
(define_expand "avx_movddup256"
4413
  [(set (match_operand:V4DF 0 "register_operand" "")
4414
        (vec_select:V4DF
4415
          (vec_concat:V8DF
4416
            (match_operand:V4DF 1 "nonimmediate_operand" "")
4417
            (match_dup 1))
4418
          (parallel [(const_int 0) (const_int 4)
4419
                     (const_int 2) (const_int 6)])))]
4420
  "TARGET_AVX"
4421
  "")
4422
 
4423
(define_expand "avx_unpcklpd256"
4424
  [(set (match_operand:V4DF 0 "register_operand" "")
4425
        (vec_select:V4DF
4426
          (vec_concat:V8DF
4427
            (match_operand:V4DF 1 "register_operand" "")
4428
            (match_operand:V4DF 2 "nonimmediate_operand" ""))
4429
          (parallel [(const_int 0) (const_int 4)
4430
                     (const_int 2) (const_int 6)])))]
4431
  "TARGET_AVX"
4432
  "")
4433
 
4434
(define_insn "*avx_unpcklpd256"
4435
  [(set (match_operand:V4DF 0 "register_operand"         "=x,x")
4436
        (vec_select:V4DF
4437
          (vec_concat:V8DF
4438
            (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4439
            (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4440
          (parallel [(const_int 0) (const_int 4)
4441
                     (const_int 2) (const_int 6)])))]
4442
  "TARGET_AVX
4443
   && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4444
  "@
4445
   vmovddup\t{%1, %0|%0, %1}
4446
   vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4447
  [(set_attr "type" "sselog")
4448
   (set_attr "prefix" "vex")
4449
   (set_attr "mode" "V4DF")])
4450
 
4451
(define_expand "vec_interleave_lowv2df"
4452
  [(set (match_operand:V2DF 0 "register_operand" "")
4453
        (vec_select:V2DF
4454
          (vec_concat:V4DF
4455
            (match_operand:V2DF 1 "nonimmediate_operand" "")
4456
            (match_operand:V2DF 2 "nonimmediate_operand" ""))
4457
          (parallel [(const_int 0)
4458
                     (const_int 2)])))]
4459
  "TARGET_SSE2"
4460
{
4461
  if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4462
    operands[1] = force_reg (V2DFmode, operands[1]);
4463
})
4464
 
4465
(define_insn "*avx_interleave_lowv2df"
4466
  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,o")
4467
        (vec_select:V2DF
4468
          (vec_concat:V4DF
4469
            (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
4470
            (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4471
          (parallel [(const_int 0)
4472
                     (const_int 2)])))]
4473
  "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4474
  "@
4475
   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4476
   vmovddup\t{%1, %0|%0, %1}
4477
   vmovhpd\t{%2, %1, %0|%0, %1, %2}
4478
   vmovlpd\t{%2, %H0|%H0, %2}"
4479
  [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4480
   (set_attr "prefix" "vex")
4481
   (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4482
 
4483
(define_insn "*sse3_interleave_lowv2df"
4484
  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,o")
4485
        (vec_select:V2DF
4486
          (vec_concat:V4DF
4487
            (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4488
            (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4489
          (parallel [(const_int 0)
4490
                     (const_int 2)])))]
4491
  "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4492
  "@
4493
   unpcklpd\t{%2, %0|%0, %2}
4494
   movddup\t{%1, %0|%0, %1}
4495
   movhpd\t{%2, %0|%0, %2}
4496
   movlpd\t{%2, %H0|%H0, %2}"
4497
  [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4498
   (set_attr "prefix_data16" "*,*,1,1")
4499
   (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4500
 
4501
(define_insn "*sse2_interleave_lowv2df"
4502
  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,o")
4503
        (vec_select:V2DF
4504
          (vec_concat:V4DF
4505
            (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4506
            (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4507
          (parallel [(const_int 0)
4508
                     (const_int 2)])))]
4509
  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4510
  "@
4511
   unpcklpd\t{%2, %0|%0, %2}
4512
   movhpd\t{%2, %0|%0, %2}
4513
   movlpd\t{%2, %H0|%H0, %2}"
4514
  [(set_attr "type" "sselog,ssemov,ssemov")
4515
   (set_attr "prefix_data16" "*,1,1")
4516
   (set_attr "mode" "V2DF,V1DF,V1DF")])
4517
 
4518
(define_split
4519
  [(set (match_operand:V2DF 0 "memory_operand" "")
4520
        (vec_select:V2DF
4521
          (vec_concat:V4DF
4522
            (match_operand:V2DF 1 "register_operand" "")
4523
            (match_dup 1))
4524
          (parallel [(const_int 0)
4525
                     (const_int 2)])))]
4526
  "TARGET_SSE3 && reload_completed"
4527
  [(const_int 0)]
4528
{
4529
  rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4530
  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4531
  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4532
  DONE;
4533
})
4534
 
4535
(define_split
4536
  [(set (match_operand:V2DF 0 "register_operand" "")
4537
        (vec_select:V2DF
4538
          (vec_concat:V4DF
4539
            (match_operand:V2DF 1 "memory_operand" "")
4540
            (match_dup 1))
4541
          (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4542
                     (match_operand:SI 3 "const_int_operand" "")])))]
4543
  "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4544
  [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4545
{
4546
  operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4547
})
4548
 
4549
(define_expand "avx_shufpd256"
4550
  [(match_operand:V4DF 0 "register_operand" "")
4551
   (match_operand:V4DF 1 "register_operand" "")
4552
   (match_operand:V4DF 2 "nonimmediate_operand" "")
4553
   (match_operand:SI 3 "const_int_operand" "")]
4554
  "TARGET_AVX"
4555
{
4556
  int mask = INTVAL (operands[3]);
4557
  emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4558
                                   GEN_INT (mask & 1),
4559
                                   GEN_INT (mask & 2 ? 5 : 4),
4560
                                   GEN_INT (mask & 4 ? 3 : 2),
4561
                                   GEN_INT (mask & 8 ? 7 : 6)));
4562
  DONE;
4563
})
4564
 
4565
(define_insn "avx_shufpd256_1"
4566
  [(set (match_operand:V4DF 0 "register_operand" "=x")
4567
        (vec_select:V4DF
4568
          (vec_concat:V8DF
4569
            (match_operand:V4DF 1 "register_operand" "x")
4570
            (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4571
          (parallel [(match_operand 3 "const_0_to_1_operand" "")
4572
                     (match_operand 4 "const_4_to_5_operand" "")
4573
                     (match_operand 5 "const_2_to_3_operand" "")
4574
                     (match_operand 6 "const_6_to_7_operand" "")])))]
4575
  "TARGET_AVX"
4576
{
4577
  int mask;
4578
  mask = INTVAL (operands[3]);
4579
  mask |= (INTVAL (operands[4]) - 4) << 1;
4580
  mask |= (INTVAL (operands[5]) - 2) << 2;
4581
  mask |= (INTVAL (operands[6]) - 6) << 3;
4582
  operands[3] = GEN_INT (mask);
4583
 
4584
  return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4585
}
4586
  [(set_attr "type" "sselog")
4587
   (set_attr "length_immediate" "1")
4588
   (set_attr "prefix" "vex")
4589
   (set_attr "mode" "V4DF")])
4590
 
4591
(define_expand "sse2_shufpd"
4592
  [(match_operand:V2DF 0 "register_operand" "")
4593
   (match_operand:V2DF 1 "register_operand" "")
4594
   (match_operand:V2DF 2 "nonimmediate_operand" "")
4595
   (match_operand:SI 3 "const_int_operand" "")]
4596
  "TARGET_SSE2"
4597
{
4598
  int mask = INTVAL (operands[3]);
4599
  emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4600
                                GEN_INT (mask & 1),
4601
                                GEN_INT (mask & 2 ? 3 : 2)));
4602
  DONE;
4603
})
4604
 
4605
(define_expand "vec_extract_even"
4606
  [(match_operand:SSEMODE_EO 0 "register_operand" "")
4607
   (match_operand:SSEMODE_EO 1 "register_operand" "")
4608
   (match_operand:SSEMODE_EO 2 "register_operand" "")]
4609
  ""
4610
{
4611
  ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4612
  DONE;
4613
})
4614
 
4615
(define_expand "vec_extract_odd"
4616
  [(match_operand:SSEMODE_EO 0 "register_operand" "")
4617
   (match_operand:SSEMODE_EO 1 "register_operand" "")
4618
   (match_operand:SSEMODE_EO 2 "register_operand" "")]
4619
  ""
4620
{
4621
  ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4622
  DONE;
4623
})
4624
 
4625
;; punpcklqdq and punpckhqdq are shorter than shufpd.
4626
(define_insn "*avx_interleave_highv2di"
4627
  [(set (match_operand:V2DI 0 "register_operand" "=x")
4628
        (vec_select:V2DI
4629
          (vec_concat:V4DI
4630
            (match_operand:V2DI 1 "register_operand" "x")
4631
            (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4632
          (parallel [(const_int 1)
4633
                     (const_int 3)])))]
4634
  "TARGET_AVX"
4635
  "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4636
  [(set_attr "type" "sselog")
4637
   (set_attr "prefix" "vex")
4638
   (set_attr "mode" "TI")])
4639
 
4640
(define_insn "vec_interleave_highv2di"
4641
  [(set (match_operand:V2DI 0 "register_operand" "=x")
4642
        (vec_select:V2DI
4643
          (vec_concat:V4DI
4644
            (match_operand:V2DI 1 "register_operand" "0")
4645
            (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4646
          (parallel [(const_int 1)
4647
                     (const_int 3)])))]
4648
  "TARGET_SSE2"
4649
  "punpckhqdq\t{%2, %0|%0, %2}"
4650
  [(set_attr "type" "sselog")
4651
   (set_attr "prefix_data16" "1")
4652
   (set_attr "mode" "TI")])
4653
 
4654
(define_insn "*avx_interleave_lowv2di"
4655
  [(set (match_operand:V2DI 0 "register_operand" "=x")
4656
        (vec_select:V2DI
4657
          (vec_concat:V4DI
4658
            (match_operand:V2DI 1 "register_operand" "x")
4659
            (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4660
          (parallel [(const_int 0)
4661
                     (const_int 2)])))]
4662
  "TARGET_AVX"
4663
  "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4664
  [(set_attr "type" "sselog")
4665
   (set_attr "prefix" "vex")
4666
   (set_attr "mode" "TI")])
4667
 
4668
(define_insn "vec_interleave_lowv2di"
4669
  [(set (match_operand:V2DI 0 "register_operand" "=x")
4670
        (vec_select:V2DI
4671
          (vec_concat:V4DI
4672
            (match_operand:V2DI 1 "register_operand" "0")
4673
            (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4674
          (parallel [(const_int 0)
4675
                     (const_int 2)])))]
4676
  "TARGET_SSE2"
4677
  "punpcklqdq\t{%2, %0|%0, %2}"
4678
  [(set_attr "type" "sselog")
4679
   (set_attr "prefix_data16" "1")
4680
   (set_attr "mode" "TI")])
4681
 
4682
(define_insn "*avx_shufpd_"
4683
  [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4684
        (vec_select:SSEMODE2D
4685
          (vec_concat:
4686
            (match_operand:SSEMODE2D 1 "register_operand" "x")
4687
            (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4688
          (parallel [(match_operand 3 "const_0_to_1_operand" "")
4689
                     (match_operand 4 "const_2_to_3_operand" "")])))]
4690
  "TARGET_AVX"
4691
{
4692
  int mask;
4693
  mask = INTVAL (operands[3]);
4694
  mask |= (INTVAL (operands[4]) - 2) << 1;
4695
  operands[3] = GEN_INT (mask);
4696
 
4697
  return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4698
}
4699
  [(set_attr "type" "sselog")
4700
   (set_attr "length_immediate" "1")
4701
   (set_attr "prefix" "vex")
4702
   (set_attr "mode" "V2DF")])
4703
 
4704
(define_insn "sse2_shufpd_"
4705
  [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4706
        (vec_select:SSEMODE2D
4707
          (vec_concat:
4708
            (match_operand:SSEMODE2D 1 "register_operand" "0")
4709
            (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
4710
          (parallel [(match_operand 3 "const_0_to_1_operand" "")
4711
                     (match_operand 4 "const_2_to_3_operand" "")])))]
4712
  "TARGET_SSE2"
4713
{
4714
  int mask;
4715
  mask = INTVAL (operands[3]);
4716
  mask |= (INTVAL (operands[4]) - 2) << 1;
4717
  operands[3] = GEN_INT (mask);
4718
 
4719
  return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4720
}
4721
  [(set_attr "type" "sselog")
4722
   (set_attr "length_immediate" "1")
4723
   (set_attr "mode" "V2DF")])
4724
 
4725
;; Avoid combining registers from different units in a single alternative,
4726
;; see comment above inline_secondary_memory_needed function in i386.c
4727
(define_insn "*avx_storehpd"
4728
  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
4729
        (vec_select:DF
4730
          (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
4731
          (parallel [(const_int 1)])))]
4732
  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4733
  "@
4734
   vmovhpd\t{%1, %0|%0, %1}
4735
   vunpckhpd\t{%1, %1, %0|%0, %1, %1}
4736
   #
4737
   #
4738
   #"
4739
  [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4740
   (set_attr "prefix" "vex")
4741
   (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4742
 
4743
(define_insn "sse2_storehpd"
4744
  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
4745
        (vec_select:DF
4746
          (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
4747
          (parallel [(const_int 1)])))]
4748
  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4749
  "@
4750
   movhpd\t{%1, %0|%0, %1}
4751
   unpckhpd\t%0, %0
4752
   #
4753
   #
4754
   #"
4755
  [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4756
   (set_attr "prefix_data16" "1,*,*,*,*")
4757
   (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4758
 
4759
(define_split
4760
  [(set (match_operand:DF 0 "register_operand" "")
4761
        (vec_select:DF
4762
          (match_operand:V2DF 1 "memory_operand" "")
4763
          (parallel [(const_int 1)])))]
4764
  "TARGET_SSE2 && reload_completed"
4765
  [(set (match_dup 0) (match_dup 1))]
4766
{
4767
  operands[1] = adjust_address (operands[1], DFmode, 8);
4768
})
4769
 
4770
;; Avoid combining registers from different units in a single alternative,
4771
;; see comment above inline_secondary_memory_needed function in i386.c
4772
(define_insn "sse2_storelpd"
4773
  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
4774
        (vec_select:DF
4775
          (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4776
          (parallel [(const_int 0)])))]
4777
  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4778
  "@
4779
   %vmovlpd\t{%1, %0|%0, %1}
4780
   #
4781
   #
4782
   #
4783
   #"
4784
  [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4785
   (set_attr "prefix_data16" "1,*,*,*,*")
4786
   (set_attr "prefix" "maybe_vex")
4787
   (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4788
 
4789
(define_split
4790
  [(set (match_operand:DF 0 "register_operand" "")
4791
        (vec_select:DF
4792
          (match_operand:V2DF 1 "nonimmediate_operand" "")
4793
          (parallel [(const_int 0)])))]
4794
  "TARGET_SSE2 && reload_completed"
4795
  [(const_int 0)]
4796
{
4797
  rtx op1 = operands[1];
4798
  if (REG_P (op1))
4799
    op1 = gen_rtx_REG (DFmode, REGNO (op1));
4800
  else
4801
    op1 = gen_lowpart (DFmode, op1);
4802
  emit_move_insn (operands[0], op1);
4803
  DONE;
4804
})
4805
 
4806
(define_expand "sse2_loadhpd_exp"
4807
  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4808
        (vec_concat:V2DF
4809
          (vec_select:DF
4810
            (match_operand:V2DF 1 "nonimmediate_operand" "")
4811
            (parallel [(const_int 0)]))
4812
          (match_operand:DF 2 "nonimmediate_operand" "")))]
4813
  "TARGET_SSE2"
4814
  "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4815
 
4816
;; Avoid combining registers from different units in a single alternative,
4817
;; see comment above inline_secondary_memory_needed function in i386.c
4818
(define_insn "*avx_loadhpd"
4819
  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,o,o,o")
4820
        (vec_concat:V2DF
4821
          (vec_select:DF
4822
            (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
4823
            (parallel [(const_int 0)]))
4824
          (match_operand:DF 2 "nonimmediate_operand"     " m,x,x,*f,r")))]
4825
  "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4826
  "@
4827
   vmovhpd\t{%2, %1, %0|%0, %1, %2}
4828
   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4829
   #
4830
   #
4831
   #"
4832
  [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
4833
   (set_attr "prefix" "vex")
4834
   (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4835
 
4836
(define_insn "sse2_loadhpd"
4837
  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,o,o,o")
4838
        (vec_concat:V2DF
4839
          (vec_select:DF
4840
            (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
4841
            (parallel [(const_int 0)]))
4842
          (match_operand:DF 2 "nonimmediate_operand"     " m,x,0,x,*f,r")))]
4843
  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4844
  "@
4845
   movhpd\t{%2, %0|%0, %2}
4846
   unpcklpd\t{%2, %0|%0, %2}
4847
   shufpd\t{$1, %1, %0|%0, %1, 1}
4848
   #
4849
   #
4850
   #"
4851
  [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4852
   (set_attr "prefix_data16" "1,*,*,*,*,*")
4853
   (set_attr "length_immediate" "*,*,1,*,*,*")
4854
   (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4855
 
4856
(define_split
4857
  [(set (match_operand:V2DF 0 "memory_operand" "")
4858
        (vec_concat:V2DF
4859
          (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4860
          (match_operand:DF 1 "register_operand" "")))]
4861
  "TARGET_SSE2 && reload_completed"
4862
  [(set (match_dup 0) (match_dup 1))]
4863
{
4864
  operands[0] = adjust_address (operands[0], DFmode, 8);
4865
})
4866
 
4867
(define_expand "sse2_loadlpd_exp"
4868
  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4869
        (vec_concat:V2DF
4870
          (match_operand:DF 2 "nonimmediate_operand" "")
4871
          (vec_select:DF
4872
            (match_operand:V2DF 1 "nonimmediate_operand" "")
4873
            (parallel [(const_int 1)]))))]
4874
  "TARGET_SSE2"
4875
  "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
4876
 
4877
;; Avoid combining registers from different units in a single alternative,
4878
;; see comment above inline_secondary_memory_needed function in i386.c
4879
(define_insn "*avx_loadlpd"
4880
  [(set (match_operand:V2DF 0 "nonimmediate_operand"    "=x,x,x,x,m,m,m")
4881
        (vec_concat:V2DF
4882
          (match_operand:DF 2 "nonimmediate_operand"    " m,m,x,x,x,*f,r")
4883
          (vec_select:DF
4884
            (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
4885
            (parallel [(const_int 1)]))))]
4886
  "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4887
  "@
4888
   vmovsd\t{%2, %0|%0, %2}
4889
   vmovlpd\t{%2, %1, %0|%0, %1, %2}
4890
   vmovsd\t{%2, %1, %0|%0, %1, %2}
4891
   vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4892
   #
4893
   #
4894
   #"
4895
  [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
4896
   (set_attr "prefix" "vex")
4897
   (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
4898
 
4899
(define_insn "sse2_loadlpd"
4900
  [(set (match_operand:V2DF 0 "nonimmediate_operand"    "=x,x,x,x,x,m,m,m")
4901
        (vec_concat:V2DF
4902
          (match_operand:DF 2 "nonimmediate_operand"    " m,m,x,0,0,x,*f,r")
4903
          (vec_select:DF
4904
            (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
4905
            (parallel [(const_int 1)]))))]
4906
  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4907
  "@
4908
   movsd\t{%2, %0|%0, %2}
4909
   movlpd\t{%2, %0|%0, %2}
4910
   movsd\t{%2, %0|%0, %2}
4911
   shufpd\t{$2, %2, %0|%0, %2, 2}
4912
   movhpd\t{%H1, %0|%0, %H1}
4913
   #
4914
   #
4915
   #"
4916
  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4917
   (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
4918
   (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
4919
   (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4920
 
4921
(define_split
4922
  [(set (match_operand:V2DF 0 "memory_operand" "")
4923
        (vec_concat:V2DF
4924
          (match_operand:DF 1 "register_operand" "")
4925
          (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4926
  "TARGET_SSE2 && reload_completed"
4927
  [(set (match_dup 0) (match_dup 1))]
4928
{
4929
  operands[0] = adjust_address (operands[0], DFmode, 8);
4930
})
4931
 
4932
;; Not sure these two are ever used, but it doesn't hurt to have
4933
;; them. -aoliva
4934
(define_insn "*vec_extractv2df_1_sse"
4935
  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4936
        (vec_select:DF
4937
          (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4938
          (parallel [(const_int 1)])))]
4939
  "!TARGET_SSE2 && TARGET_SSE
4940
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4941
  "@
4942
   movhps\t{%1, %0|%0, %1}
4943
   movhlps\t{%1, %0|%0, %1}
4944
   movlps\t{%H1, %0|%0, %H1}"
4945
  [(set_attr "type" "ssemov")
4946
   (set_attr "mode" "V2SF,V4SF,V2SF")])
4947
 
4948
(define_insn "*vec_extractv2df_0_sse"
4949
  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4950
        (vec_select:DF
4951
          (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4952
          (parallel [(const_int 0)])))]
4953
  "!TARGET_SSE2 && TARGET_SSE
4954
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4955
  "@
4956
   movlps\t{%1, %0|%0, %1}
4957
   movaps\t{%1, %0|%0, %1}
4958
   movlps\t{%1, %0|%0, %1}"
4959
  [(set_attr "type" "ssemov")
4960
   (set_attr "mode" "V2SF,V4SF,V2SF")])
4961
 
4962
(define_insn "*avx_movsd"
4963
  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,m,x,o")
4964
        (vec_merge:V2DF
4965
          (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
4966
          (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
4967
          (const_int 1)))]
4968
  "TARGET_AVX"
4969
  "@
4970
   vmovsd\t{%2, %1, %0|%0, %1, %2}
4971
   vmovlpd\t{%2, %1, %0|%0, %1, %2}
4972
   vmovlpd\t{%2, %0|%0, %2}
4973
   vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4974
   vmovhps\t{%1, %H0|%H0, %1}"
4975
  [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
4976
   (set_attr "prefix" "vex")
4977
   (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
4978
 
4979
(define_insn "sse2_movsd"
4980
  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,m,x,x,o")
4981
        (vec_merge:V2DF
4982
          (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
4983
          (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
4984
          (const_int 1)))]
4985
  "TARGET_SSE2"
4986
  "@
4987
   movsd\t{%2, %0|%0, %2}
4988
   movlpd\t{%2, %0|%0, %2}
4989
   movlpd\t{%2, %0|%0, %2}
4990
   shufpd\t{$2, %2, %0|%0, %2, 2}
4991
   movhps\t{%H1, %0|%0, %H1}
4992
   movhps\t{%1, %H0|%H0, %1}"
4993
  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
4994
   (set_attr "prefix_data16" "*,1,1,*,*,*")
4995
   (set_attr "length_immediate" "*,*,*,1,*,*")
4996
   (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
4997
 
4998
(define_insn "*vec_dupv2df_sse3"
4999
  [(set (match_operand:V2DF 0 "register_operand" "=x")
5000
        (vec_duplicate:V2DF
5001
          (match_operand:DF 1 "nonimmediate_operand" "xm")))]
5002
  "TARGET_SSE3"
5003
  "%vmovddup\t{%1, %0|%0, %1}"
5004
  [(set_attr "type" "sselog1")
5005
   (set_attr "prefix" "maybe_vex")
5006
   (set_attr "mode" "DF")])
5007
 
5008
(define_insn "vec_dupv2df"
5009
  [(set (match_operand:V2DF 0 "register_operand" "=x")
5010
        (vec_duplicate:V2DF
5011
          (match_operand:DF 1 "register_operand" "0")))]
5012
  "TARGET_SSE2"
5013
  "unpcklpd\t%0, %0"
5014
  [(set_attr "type" "sselog1")
5015
   (set_attr "mode" "V2DF")])
5016
 
5017
(define_insn "*vec_concatv2df_sse3"
5018
  [(set (match_operand:V2DF 0 "register_operand" "=x")
5019
        (vec_concat:V2DF
5020
          (match_operand:DF 1 "nonimmediate_operand" "xm")
5021
          (match_dup 1)))]
5022
  "TARGET_SSE3"
5023
  "%vmovddup\t{%1, %0|%0, %1}"
5024
  [(set_attr "type" "sselog1")
5025
   (set_attr "prefix" "maybe_vex")
5026
   (set_attr "mode" "DF")])
5027
 
5028
(define_insn "*vec_concatv2df_avx"
5029
  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,x")
5030
        (vec_concat:V2DF
5031
          (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
5032
          (match_operand:DF 2 "vector_move_operand"  " x,m,C")))]
5033
  "TARGET_AVX"
5034
  "@
5035
   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
5036
   vmovhpd\t{%2, %1, %0|%0, %1, %2}
5037
   vmovsd\t{%1, %0|%0, %1}"
5038
  [(set_attr "type" "ssemov")
5039
   (set_attr "prefix" "vex")
5040
   (set_attr "mode" "DF,V1DF,DF")])
5041
 
5042
(define_insn "*vec_concatv2df"
5043
  [(set (match_operand:V2DF 0 "register_operand"     "=Y2,Y2,Y2,x,x")
5044
        (vec_concat:V2DF
5045
          (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
5046
          (match_operand:DF 2 "vector_move_operand"  " Y2,m ,C ,x,m")))]
5047
  "TARGET_SSE"
5048
  "@
5049
   unpcklpd\t{%2, %0|%0, %2}
5050
   movhpd\t{%2, %0|%0, %2}
5051
   movsd\t{%1, %0|%0, %1}
5052
   movlhps\t{%2, %0|%0, %2}
5053
   movhps\t{%2, %0|%0, %2}"
5054
  [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5055
   (set_attr "prefix_data16" "*,1,*,*,*")
5056
   (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
5057
 
5058
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5059
;;
5060
;; Parallel integral arithmetic
5061
;;
5062
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5063
 
5064
(define_expand "neg2"
5065
  [(set (match_operand:SSEMODEI 0 "register_operand" "")
5066
        (minus:SSEMODEI
5067
          (match_dup 2)
5068
          (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
5069
  "TARGET_SSE2"
5070
  "operands[2] = force_reg (mode, CONST0_RTX (mode));")
5071
 
5072
(define_expand "3"
5073
  [(set (match_operand:SSEMODEI 0 "register_operand" "")
5074
        (plusminus:SSEMODEI
5075
          (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5076
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5077
  "TARGET_SSE2"
5078
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
5079
 
5080
(define_insn "*avx_3"
5081
  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5082
        (plusminus:SSEMODEI
5083
          (match_operand:SSEMODEI 1 "nonimmediate_operand" "x")
5084
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5085
  "TARGET_AVX && ix86_binary_operator_ok (, mode, operands)"
5086
  "vp\t{%2, %1, %0|%0, %1, %2}"
5087
  [(set_attr "type" "sseiadd")
5088
   (set_attr "prefix" "vex")
5089
   (set_attr "mode" "TI")])
5090
 
5091
(define_insn "*3"
5092
  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5093
        (plusminus:SSEMODEI
5094
          (match_operand:SSEMODEI 1 "nonimmediate_operand" "0")
5095
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5096
  "TARGET_SSE2 && ix86_binary_operator_ok (, mode, operands)"
5097
  "p\t{%2, %0|%0, %2}"
5098
  [(set_attr "type" "sseiadd")
5099
   (set_attr "prefix_data16" "1")
5100
   (set_attr "mode" "TI")])
5101
 
5102
(define_expand "sse2_3"
5103
  [(set (match_operand:SSEMODE12 0 "register_operand" "")
5104
        (sat_plusminus:SSEMODE12
5105
          (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
5106
          (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
5107
  "TARGET_SSE2"
5108
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
5109
 
5110
(define_insn "*avx_3"
5111
  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5112
        (sat_plusminus:SSEMODE12
5113
          (match_operand:SSEMODE12 1 "nonimmediate_operand" "x")
5114
          (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5115
  "TARGET_AVX && ix86_binary_operator_ok (, mode, operands)"
5116
  "vp\t{%2, %1, %0|%0, %1, %2}"
5117
  [(set_attr "type" "sseiadd")
5118
   (set_attr "prefix" "vex")
5119
   (set_attr "mode" "TI")])
5120
 
5121
(define_insn "*sse2_3"
5122
  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
5123
        (sat_plusminus:SSEMODE12
5124
          (match_operand:SSEMODE12 1 "nonimmediate_operand" "0")
5125
          (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
5126
  "TARGET_SSE2 && ix86_binary_operator_ok (, mode, operands)"
5127
  "p\t{%2, %0|%0, %2}"
5128
  [(set_attr "type" "sseiadd")
5129
   (set_attr "prefix_data16" "1")
5130
   (set_attr "mode" "TI")])
5131
 
5132
(define_insn_and_split "mulv16qi3"
5133
  [(set (match_operand:V16QI 0 "register_operand" "")
5134
        (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
5135
                    (match_operand:V16QI 2 "register_operand" "")))]
5136
  "TARGET_SSE2
5137
   && can_create_pseudo_p ()"
5138
  "#"
5139
  "&& 1"
5140
  [(const_int 0)]
5141
{
5142
  rtx t[6];
5143
  int i;
5144
 
5145
  for (i = 0; i < 6; ++i)
5146
    t[i] = gen_reg_rtx (V16QImode);
5147
 
5148
  /* Unpack data such that we've got a source byte in each low byte of
5149
     each word.  We don't care what goes into the high byte of each word.
5150
     Rather than trying to get zero in there, most convenient is to let
5151
     it be a copy of the low byte.  */
5152
  emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
5153
  emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
5154
  emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
5155
  emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
5156
 
5157
  /* Multiply words.  The end-of-line annotations here give a picture of what
5158
     the output of that instruction looks like.  Dot means don't care; the
5159
     letters are the bytes of the result with A being the most significant.  */
5160
  emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
5161
                           gen_lowpart (V8HImode, t[0]),
5162
                           gen_lowpart (V8HImode, t[1])));
5163
  emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
5164
                           gen_lowpart (V8HImode, t[2]),
5165
                           gen_lowpart (V8HImode, t[3])));
5166
 
5167
  /* Extract the even bytes and merge them back together.  */
5168
  ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
5169
  DONE;
5170
})
5171
 
5172
(define_expand "mulv8hi3"
5173
  [(set (match_operand:V8HI 0 "register_operand" "")
5174
        (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
5175
                   (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5176
  "TARGET_SSE2"
5177
  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5178
 
5179
(define_insn "*avx_mulv8hi3"
5180
  [(set (match_operand:V8HI 0 "register_operand" "=x")
5181
        (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5182
                   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5183
  "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5184
  "vpmullw\t{%2, %1, %0|%0, %1, %2}"
5185
  [(set_attr "type" "sseimul")
5186
   (set_attr "prefix" "vex")
5187
   (set_attr "mode" "TI")])
5188
 
5189
(define_insn "*mulv8hi3"
5190
  [(set (match_operand:V8HI 0 "register_operand" "=x")
5191
        (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5192
                   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
5193
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5194
  "pmullw\t{%2, %0|%0, %2}"
5195
  [(set_attr "type" "sseimul")
5196
   (set_attr "prefix_data16" "1")
5197
   (set_attr "mode" "TI")])
5198
 
5199
(define_expand "smulv8hi3_highpart"
5200
  [(set (match_operand:V8HI 0 "register_operand" "")
5201
        (truncate:V8HI
5202
          (lshiftrt:V8SI
5203
            (mult:V8SI
5204
              (sign_extend:V8SI
5205
                (match_operand:V8HI 1 "nonimmediate_operand" ""))
5206
              (sign_extend:V8SI
5207
                (match_operand:V8HI 2 "nonimmediate_operand" "")))
5208
            (const_int 16))))]
5209
  "TARGET_SSE2"
5210
  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5211
 
5212
(define_insn "*avxv8hi3_highpart"
5213
  [(set (match_operand:V8HI 0 "register_operand" "=x")
5214
        (truncate:V8HI
5215
          (lshiftrt:V8SI
5216
            (mult:V8SI
5217
              (sign_extend:V8SI
5218
                (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5219
              (sign_extend:V8SI
5220
                (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5221
            (const_int 16))))]
5222
  "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5223
  "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
5224
  [(set_attr "type" "sseimul")
5225
   (set_attr "prefix" "vex")
5226
   (set_attr "mode" "TI")])
5227
 
5228
(define_insn "*smulv8hi3_highpart"
5229
  [(set (match_operand:V8HI 0 "register_operand" "=x")
5230
        (truncate:V8HI
5231
          (lshiftrt:V8SI
5232
            (mult:V8SI
5233
              (sign_extend:V8SI
5234
                (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5235
              (sign_extend:V8SI
5236
                (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5237
            (const_int 16))))]
5238
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5239
  "pmulhw\t{%2, %0|%0, %2}"
5240
  [(set_attr "type" "sseimul")
5241
   (set_attr "prefix_data16" "1")
5242
   (set_attr "mode" "TI")])
5243
 
5244
(define_expand "umulv8hi3_highpart"
5245
  [(set (match_operand:V8HI 0 "register_operand" "")
5246
        (truncate:V8HI
5247
          (lshiftrt:V8SI
5248
            (mult:V8SI
5249
              (zero_extend:V8SI
5250
                (match_operand:V8HI 1 "nonimmediate_operand" ""))
5251
              (zero_extend:V8SI
5252
                (match_operand:V8HI 2 "nonimmediate_operand" "")))
5253
            (const_int 16))))]
5254
  "TARGET_SSE2"
5255
  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5256
 
5257
(define_insn "*avx_umulv8hi3_highpart"
5258
  [(set (match_operand:V8HI 0 "register_operand" "=x")
5259
        (truncate:V8HI
5260
          (lshiftrt:V8SI
5261
            (mult:V8SI
5262
              (zero_extend:V8SI
5263
                (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
5264
              (zero_extend:V8SI
5265
                (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5266
            (const_int 16))))]
5267
  "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5268
  "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
5269
  [(set_attr "type" "sseimul")
5270
   (set_attr "prefix" "vex")
5271
   (set_attr "mode" "TI")])
5272
 
5273
(define_insn "*umulv8hi3_highpart"
5274
  [(set (match_operand:V8HI 0 "register_operand" "=x")
5275
        (truncate:V8HI
5276
          (lshiftrt:V8SI
5277
            (mult:V8SI
5278
              (zero_extend:V8SI
5279
                (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
5280
              (zero_extend:V8SI
5281
                (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
5282
            (const_int 16))))]
5283
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5284
  "pmulhuw\t{%2, %0|%0, %2}"
5285
  [(set_attr "type" "sseimul")
5286
   (set_attr "prefix_data16" "1")
5287
   (set_attr "mode" "TI")])
5288
 
5289
(define_expand "sse2_umulv2siv2di3"
5290
  [(set (match_operand:V2DI 0 "register_operand" "")
5291
        (mult:V2DI
5292
          (zero_extend:V2DI
5293
            (vec_select:V2SI
5294
              (match_operand:V4SI 1 "nonimmediate_operand" "")
5295
              (parallel [(const_int 0) (const_int 2)])))
5296
          (zero_extend:V2DI
5297
            (vec_select:V2SI
5298
              (match_operand:V4SI 2 "nonimmediate_operand" "")
5299
              (parallel [(const_int 0) (const_int 2)])))))]
5300
  "TARGET_SSE2"
5301
  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5302
 
5303
(define_insn "*avx_umulv2siv2di3"
5304
  [(set (match_operand:V2DI 0 "register_operand" "=x")
5305
        (mult:V2DI
5306
          (zero_extend:V2DI
5307
            (vec_select:V2SI
5308
              (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5309
              (parallel [(const_int 0) (const_int 2)])))
5310
          (zero_extend:V2DI
5311
            (vec_select:V2SI
5312
              (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5313
              (parallel [(const_int 0) (const_int 2)])))))]
5314
  "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5315
  "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5316
  [(set_attr "type" "sseimul")
5317
   (set_attr "prefix" "vex")
5318
   (set_attr "mode" "TI")])
5319
 
5320
(define_insn "*sse2_umulv2siv2di3"
5321
  [(set (match_operand:V2DI 0 "register_operand" "=x")
5322
        (mult:V2DI
5323
          (zero_extend:V2DI
5324
            (vec_select:V2SI
5325
              (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5326
              (parallel [(const_int 0) (const_int 2)])))
5327
          (zero_extend:V2DI
5328
            (vec_select:V2SI
5329
              (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5330
              (parallel [(const_int 0) (const_int 2)])))))]
5331
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5332
  "pmuludq\t{%2, %0|%0, %2}"
5333
  [(set_attr "type" "sseimul")
5334
   (set_attr "prefix_data16" "1")
5335
   (set_attr "mode" "TI")])
5336
 
5337
(define_expand "sse4_1_mulv2siv2di3"
5338
  [(set (match_operand:V2DI 0 "register_operand" "")
5339
        (mult:V2DI
5340
          (sign_extend:V2DI
5341
            (vec_select:V2SI
5342
              (match_operand:V4SI 1 "nonimmediate_operand" "")
5343
              (parallel [(const_int 0) (const_int 2)])))
5344
          (sign_extend:V2DI
5345
            (vec_select:V2SI
5346
              (match_operand:V4SI 2 "nonimmediate_operand" "")
5347
              (parallel [(const_int 0) (const_int 2)])))))]
5348
  "TARGET_SSE4_1"
5349
  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5350
 
5351
(define_insn "*avx_mulv2siv2di3"
5352
  [(set (match_operand:V2DI 0 "register_operand" "=x")
5353
        (mult:V2DI
5354
          (sign_extend:V2DI
5355
            (vec_select:V2SI
5356
              (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5357
              (parallel [(const_int 0) (const_int 2)])))
5358
          (sign_extend:V2DI
5359
            (vec_select:V2SI
5360
              (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5361
              (parallel [(const_int 0) (const_int 2)])))))]
5362
  "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5363
  "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5364
  [(set_attr "type" "sseimul")
5365
   (set_attr "prefix_extra" "1")
5366
   (set_attr "prefix" "vex")
5367
   (set_attr "mode" "TI")])
5368
 
5369
(define_insn "*sse4_1_mulv2siv2di3"
5370
  [(set (match_operand:V2DI 0 "register_operand" "=x")
5371
        (mult:V2DI
5372
          (sign_extend:V2DI
5373
            (vec_select:V2SI
5374
              (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5375
              (parallel [(const_int 0) (const_int 2)])))
5376
          (sign_extend:V2DI
5377
            (vec_select:V2SI
5378
              (match_operand:V4SI 2 "nonimmediate_operand" "xm")
5379
              (parallel [(const_int 0) (const_int 2)])))))]
5380
  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5381
  "pmuldq\t{%2, %0|%0, %2}"
5382
  [(set_attr "type" "sseimul")
5383
   (set_attr "prefix_extra" "1")
5384
   (set_attr "mode" "TI")])
5385
 
5386
(define_expand "sse2_pmaddwd"
5387
  [(set (match_operand:V4SI 0 "register_operand" "")
5388
        (plus:V4SI
5389
          (mult:V4SI
5390
            (sign_extend:V4SI
5391
              (vec_select:V4HI
5392
                (match_operand:V8HI 1 "nonimmediate_operand" "")
5393
                (parallel [(const_int 0)
5394
                           (const_int 2)
5395
                           (const_int 4)
5396
                           (const_int 6)])))
5397
            (sign_extend:V4SI
5398
              (vec_select:V4HI
5399
                (match_operand:V8HI 2 "nonimmediate_operand" "")
5400
                (parallel [(const_int 0)
5401
                           (const_int 2)
5402
                           (const_int 4)
5403
                           (const_int 6)]))))
5404
          (mult:V4SI
5405
            (sign_extend:V4SI
5406
              (vec_select:V4HI (match_dup 1)
5407
                (parallel [(const_int 1)
5408
                           (const_int 3)
5409
                           (const_int 5)
5410
                           (const_int 7)])))
5411
            (sign_extend:V4SI
5412
              (vec_select:V4HI (match_dup 2)
5413
                (parallel [(const_int 1)
5414
                           (const_int 3)
5415
                           (const_int 5)
5416
                           (const_int 7)]))))))]
5417
  "TARGET_SSE2"
5418
  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5419
 
5420
(define_insn "*avx_pmaddwd"
5421
  [(set (match_operand:V4SI 0 "register_operand" "=x")
5422
        (plus:V4SI
5423
          (mult:V4SI
5424
            (sign_extend:V4SI
5425
              (vec_select:V4HI
5426
                (match_operand:V8HI 1 "nonimmediate_operand" "%x")
5427
                (parallel [(const_int 0)
5428
                           (const_int 2)
5429
                           (const_int 4)
5430
                           (const_int 6)])))
5431
            (sign_extend:V4SI
5432
              (vec_select:V4HI
5433
                (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5434
                (parallel [(const_int 0)
5435
                           (const_int 2)
5436
                           (const_int 4)
5437
                           (const_int 6)]))))
5438
          (mult:V4SI
5439
            (sign_extend:V4SI
5440
              (vec_select:V4HI (match_dup 1)
5441
                (parallel [(const_int 1)
5442
                           (const_int 3)
5443
                           (const_int 5)
5444
                           (const_int 7)])))
5445
            (sign_extend:V4SI
5446
              (vec_select:V4HI (match_dup 2)
5447
                (parallel [(const_int 1)
5448
                           (const_int 3)
5449
                           (const_int 5)
5450
                           (const_int 7)]))))))]
5451
  "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5452
  "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5453
  [(set_attr "type" "sseiadd")
5454
   (set_attr "prefix" "vex")
5455
   (set_attr "mode" "TI")])
5456
 
5457
(define_insn "*sse2_pmaddwd"
5458
  [(set (match_operand:V4SI 0 "register_operand" "=x")
5459
        (plus:V4SI
5460
          (mult:V4SI
5461
            (sign_extend:V4SI
5462
              (vec_select:V4HI
5463
                (match_operand:V8HI 1 "nonimmediate_operand" "%0")
5464
                (parallel [(const_int 0)
5465
                           (const_int 2)
5466
                           (const_int 4)
5467
                           (const_int 6)])))
5468
            (sign_extend:V4SI
5469
              (vec_select:V4HI
5470
                (match_operand:V8HI 2 "nonimmediate_operand" "xm")
5471
                (parallel [(const_int 0)
5472
                           (const_int 2)
5473
                           (const_int 4)
5474
                           (const_int 6)]))))
5475
          (mult:V4SI
5476
            (sign_extend:V4SI
5477
              (vec_select:V4HI (match_dup 1)
5478
                (parallel [(const_int 1)
5479
                           (const_int 3)
5480
                           (const_int 5)
5481
                           (const_int 7)])))
5482
            (sign_extend:V4SI
5483
              (vec_select:V4HI (match_dup 2)
5484
                (parallel [(const_int 1)
5485
                           (const_int 3)
5486
                           (const_int 5)
5487
                           (const_int 7)]))))))]
5488
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5489
  "pmaddwd\t{%2, %0|%0, %2}"
5490
  [(set_attr "type" "sseiadd")
5491
   (set_attr "atom_unit" "simul")
5492
   (set_attr "prefix_data16" "1")
5493
   (set_attr "mode" "TI")])
5494
 
5495
(define_expand "mulv4si3"
5496
  [(set (match_operand:V4SI 0 "register_operand" "")
5497
        (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5498
                   (match_operand:V4SI 2 "register_operand" "")))]
5499
  "TARGET_SSE2"
5500
{
5501
  if (TARGET_SSE4_1 || TARGET_AVX)
5502
    ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5503
})
5504
 
5505
(define_insn "*avx_mulv4si3"
5506
  [(set (match_operand:V4SI 0 "register_operand" "=x")
5507
        (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5508
                   (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5509
  "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5510
  "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5511
  [(set_attr "type" "sseimul")
5512
   (set_attr "prefix_extra" "1")
5513
   (set_attr "prefix" "vex")
5514
   (set_attr "mode" "TI")])
5515
 
5516
(define_insn "*sse4_1_mulv4si3"
5517
  [(set (match_operand:V4SI 0 "register_operand" "=x")
5518
        (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
5519
                   (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5520
  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5521
  "pmulld\t{%2, %0|%0, %2}"
5522
  [(set_attr "type" "sseimul")
5523
   (set_attr "prefix_extra" "1")
5524
   (set_attr "mode" "TI")])
5525
 
5526
(define_insn_and_split "*sse2_mulv4si3"
5527
  [(set (match_operand:V4SI 0 "register_operand" "")
5528
        (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5529
                   (match_operand:V4SI 2 "register_operand" "")))]
5530
  "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5531
   && can_create_pseudo_p ()"
5532
  "#"
5533
  "&& 1"
5534
  [(const_int 0)]
5535
{
5536
  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5537
  rtx op0, op1, op2;
5538
 
5539
  op0 = operands[0];
5540
  op1 = operands[1];
5541
  op2 = operands[2];
5542
  t1 = gen_reg_rtx (V4SImode);
5543
  t2 = gen_reg_rtx (V4SImode);
5544
  t3 = gen_reg_rtx (V4SImode);
5545
  t4 = gen_reg_rtx (V4SImode);
5546
  t5 = gen_reg_rtx (V4SImode);
5547
  t6 = gen_reg_rtx (V4SImode);
5548
  thirtytwo = GEN_INT (32);
5549
 
5550
  /* Multiply elements 2 and 0.  */
5551
  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5552
                                     op1, op2));
5553
 
5554
  /* Shift both input vectors down one element, so that elements 3
5555
     and 1 are now in the slots for elements 2 and 0.  For K8, at
5556
     least, this is faster than using a shuffle.  */
5557
  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5558
                                 gen_lowpart (V1TImode, op1),
5559
                                 thirtytwo));
5560
  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5561
                                 gen_lowpart (V1TImode, op2),
5562
                                 thirtytwo));
5563
  /* Multiply elements 3 and 1.  */
5564
  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5565
                                     t2, t3));
5566
 
5567
  /* Move the results in element 2 down to element 1; we don't care
5568
     what goes in elements 2 and 3.  */
5569
  emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5570
                                const0_rtx, const0_rtx));
5571
  emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5572
                                const0_rtx, const0_rtx));
5573
 
5574
  /* Merge the parts back together.  */
5575
  emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5576
  DONE;
5577
})
5578
 
5579
(define_insn_and_split "mulv2di3"
5580
  [(set (match_operand:V2DI 0 "register_operand" "")
5581
        (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5582
                   (match_operand:V2DI 2 "register_operand" "")))]
5583
  "TARGET_SSE2
5584
   && can_create_pseudo_p ()"
5585
  "#"
5586
  "&& 1"
5587
  [(const_int 0)]
5588
{
5589
  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5590
  rtx op0, op1, op2;
5591
 
5592
  op0 = operands[0];
5593
  op1 = operands[1];
5594
  op2 = operands[2];
5595
 
5596
  if (TARGET_XOP)
5597
    {
5598
      /* op1: A,B,C,D, op2: E,F,G,H */
5599
      op1 = gen_lowpart (V4SImode, op1);
5600
      op2 = gen_lowpart (V4SImode, op2);
5601
 
5602
      t1 = gen_reg_rtx (V4SImode);
5603
      t2 = gen_reg_rtx (V4SImode);
5604
      t3 = gen_reg_rtx (V2DImode);
5605
      t4 = gen_reg_rtx (V2DImode);
5606
 
5607
      /* t1: B,A,D,C */
5608
      emit_insn (gen_sse2_pshufd_1 (t1, op1,
5609
                                    GEN_INT (1),
5610
                                    GEN_INT (0),
5611
                                    GEN_INT (3),
5612
                                    GEN_INT (2)));
5613
 
5614
      /* t2: (B*E),(A*F),(D*G),(C*H) */
5615
      emit_insn (gen_mulv4si3 (t2, t1, op2));
5616
 
5617
      /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5618
      emit_insn (gen_xop_phadddq (t3, t2));
5619
 
5620
      /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5621
      emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5622
 
5623
      /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5624
      emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5625
    }
5626
  else
5627
    {
5628
      t1 = gen_reg_rtx (V2DImode);
5629
      t2 = gen_reg_rtx (V2DImode);
5630
      t3 = gen_reg_rtx (V2DImode);
5631
      t4 = gen_reg_rtx (V2DImode);
5632
      t5 = gen_reg_rtx (V2DImode);
5633
      t6 = gen_reg_rtx (V2DImode);
5634
      thirtytwo = GEN_INT (32);
5635
 
5636
      /* Multiply low parts.  */
5637
      emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5638
                                         gen_lowpart (V4SImode, op2)));
5639
 
5640
      /* Shift input vectors left 32 bits so we can multiply high parts.  */
5641
      emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5642
      emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5643
 
5644
      /* Multiply high parts by low parts.  */
5645
      emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5646
                                         gen_lowpart (V4SImode, t3)));
5647
      emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5648
                                         gen_lowpart (V4SImode, t2)));
5649
 
5650
      /* Shift them back.  */
5651
      emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5652
      emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5653
 
5654
      /* Add the three parts together.  */
5655
      emit_insn (gen_addv2di3 (t6, t1, t4));
5656
      emit_insn (gen_addv2di3 (op0, t6, t5));
5657
    }
5658
  DONE;
5659
})
5660
 
5661
(define_expand "vec_widen_smult_hi_v8hi"
5662
  [(match_operand:V4SI 0 "register_operand" "")
5663
   (match_operand:V8HI 1 "register_operand" "")
5664
   (match_operand:V8HI 2 "register_operand" "")]
5665
  "TARGET_SSE2"
5666
{
5667
  rtx op1, op2, t1, t2, dest;
5668
 
5669
  op1 = operands[1];
5670
  op2 = operands[2];
5671
  t1 = gen_reg_rtx (V8HImode);
5672
  t2 = gen_reg_rtx (V8HImode);
5673
  dest = gen_lowpart (V8HImode, operands[0]);
5674
 
5675
  emit_insn (gen_mulv8hi3 (t1, op1, op2));
5676
  emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5677
  emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5678
  DONE;
5679
})
5680
 
5681
(define_expand "vec_widen_smult_lo_v8hi"
5682
  [(match_operand:V4SI 0 "register_operand" "")
5683
   (match_operand:V8HI 1 "register_operand" "")
5684
   (match_operand:V8HI 2 "register_operand" "")]
5685
  "TARGET_SSE2"
5686
{
5687
  rtx op1, op2, t1, t2, dest;
5688
 
5689
  op1 = operands[1];
5690
  op2 = operands[2];
5691
  t1 = gen_reg_rtx (V8HImode);
5692
  t2 = gen_reg_rtx (V8HImode);
5693
  dest = gen_lowpart (V8HImode, operands[0]);
5694
 
5695
  emit_insn (gen_mulv8hi3 (t1, op1, op2));
5696
  emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
5697
  emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5698
  DONE;
5699
})
5700
 
5701
(define_expand "vec_widen_umult_hi_v8hi"
5702
  [(match_operand:V4SI 0 "register_operand" "")
5703
   (match_operand:V8HI 1 "register_operand" "")
5704
   (match_operand:V8HI 2 "register_operand" "")]
5705
  "TARGET_SSE2"
5706
{
5707
  rtx op1, op2, t1, t2, dest;
5708
 
5709
  op1 = operands[1];
5710
  op2 = operands[2];
5711
  t1 = gen_reg_rtx (V8HImode);
5712
  t2 = gen_reg_rtx (V8HImode);
5713
  dest = gen_lowpart (V8HImode, operands[0]);
5714
 
5715
  emit_insn (gen_mulv8hi3 (t1, op1, op2));
5716
  emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5717
  emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5718
  DONE;
5719
})
5720
 
5721
(define_expand "vec_widen_umult_lo_v8hi"
5722
  [(match_operand:V4SI 0 "register_operand" "")
5723
   (match_operand:V8HI 1 "register_operand" "")
5724
   (match_operand:V8HI 2 "register_operand" "")]
5725
  "TARGET_SSE2"
5726
{
5727
  rtx op1, op2, t1, t2, dest;
5728
 
5729
  op1 = operands[1];
5730
  op2 = operands[2];
5731
  t1 = gen_reg_rtx (V8HImode);
5732
  t2 = gen_reg_rtx (V8HImode);
5733
  dest = gen_lowpart (V8HImode, operands[0]);
5734
 
5735
  emit_insn (gen_mulv8hi3 (t1, op1, op2));
5736
  emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5737
  emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5738
  DONE;
5739
})
5740
 
5741
(define_expand "vec_widen_smult_hi_v4si"
5742
  [(match_operand:V2DI 0 "register_operand" "")
5743
   (match_operand:V4SI 1 "register_operand" "")
5744
   (match_operand:V4SI 2 "register_operand" "")]
5745
  "TARGET_XOP"
5746
{
5747
  rtx t1, t2;
5748
 
5749
  t1 = gen_reg_rtx (V4SImode);
5750
  t2 = gen_reg_rtx (V4SImode);
5751
 
5752
  emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5753
                                GEN_INT (0),
5754
                                GEN_INT (2),
5755
                                GEN_INT (1),
5756
                                GEN_INT (3)));
5757
  emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5758
                                GEN_INT (0),
5759
                                GEN_INT (2),
5760
                                GEN_INT (1),
5761
                                GEN_INT (3)));
5762
  emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5763
  DONE;
5764
})
5765
 
5766
(define_expand "vec_widen_smult_lo_v4si"
5767
  [(match_operand:V2DI 0 "register_operand" "")
5768
   (match_operand:V4SI 1 "register_operand" "")
5769
   (match_operand:V4SI 2 "register_operand" "")]
5770
  "TARGET_XOP"
5771
{
5772
  rtx t1, t2;
5773
 
5774
  t1 = gen_reg_rtx (V4SImode);
5775
  t2 = gen_reg_rtx (V4SImode);
5776
 
5777
  emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5778
                                GEN_INT (0),
5779
                                GEN_INT (2),
5780
                                GEN_INT (1),
5781
                                GEN_INT (3)));
5782
  emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5783
                                GEN_INT (0),
5784
                                GEN_INT (2),
5785
                                GEN_INT (1),
5786
                                GEN_INT (3)));
5787
  emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5788
  DONE;
5789
})
5790
 
5791
(define_expand "vec_widen_umult_hi_v4si"
5792
  [(match_operand:V2DI 0 "register_operand" "")
5793
   (match_operand:V4SI 1 "register_operand" "")
5794
   (match_operand:V4SI 2 "register_operand" "")]
5795
  "TARGET_SSE2"
5796
{
5797
  rtx op1, op2, t1, t2;
5798
 
5799
  op1 = operands[1];
5800
  op2 = operands[2];
5801
  t1 = gen_reg_rtx (V4SImode);
5802
  t2 = gen_reg_rtx (V4SImode);
5803
 
5804
  emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5805
  emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5806
  emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5807
  DONE;
5808
})
5809
 
5810
(define_expand "vec_widen_umult_lo_v4si"
5811
  [(match_operand:V2DI 0 "register_operand" "")
5812
   (match_operand:V4SI 1 "register_operand" "")
5813
   (match_operand:V4SI 2 "register_operand" "")]
5814
  "TARGET_SSE2"
5815
{
5816
  rtx op1, op2, t1, t2;
5817
 
5818
  op1 = operands[1];
5819
  op2 = operands[2];
5820
  t1 = gen_reg_rtx (V4SImode);
5821
  t2 = gen_reg_rtx (V4SImode);
5822
 
5823
  emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5824
  emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5825
  emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5826
  DONE;
5827
})
5828
 
5829
(define_expand "sdot_prodv8hi"
5830
  [(match_operand:V4SI 0 "register_operand" "")
5831
   (match_operand:V8HI 1 "register_operand" "")
5832
   (match_operand:V8HI 2 "register_operand" "")
5833
   (match_operand:V4SI 3 "register_operand" "")]
5834
  "TARGET_SSE2"
5835
{
5836
  rtx t = gen_reg_rtx (V4SImode);
5837
  emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5838
  emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5839
  DONE;
5840
})
5841
 
5842
(define_expand "udot_prodv4si"
5843
  [(match_operand:V2DI 0 "register_operand" "")
5844
   (match_operand:V4SI 1 "register_operand" "")
5845
   (match_operand:V4SI 2 "register_operand" "")
5846
   (match_operand:V2DI 3 "register_operand" "")]
5847
  "TARGET_SSE2"
5848
{
5849
  rtx t1, t2, t3, t4;
5850
 
5851
  t1 = gen_reg_rtx (V2DImode);
5852
  emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5853
  emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5854
 
5855
  t2 = gen_reg_rtx (V4SImode);
5856
  t3 = gen_reg_rtx (V4SImode);
5857
  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5858
                                 gen_lowpart (V1TImode, operands[1]),
5859
                                 GEN_INT (32)));
5860
  emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5861
                                 gen_lowpart (V1TImode, operands[2]),
5862
                                 GEN_INT (32)));
5863
 
5864
  t4 = gen_reg_rtx (V2DImode);
5865
  emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5866
 
5867
  emit_insn (gen_addv2di3 (operands[0], t1, t4));
5868
  DONE;
5869
})
5870
 
5871
(define_insn "*avx_ashr3"
5872
  [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5873
        (ashiftrt:SSEMODE24
5874
          (match_operand:SSEMODE24 1 "register_operand" "x")
5875
          (match_operand:SI 2 "nonmemory_operand" "xN")))]
5876
  "TARGET_AVX"
5877
  "vpsra\t{%2, %1, %0|%0, %1, %2}"
5878
  [(set_attr "type" "sseishft")
5879
   (set_attr "prefix" "vex")
5880
   (set (attr "length_immediate")
5881
     (if_then_else (match_operand 2 "const_int_operand" "")
5882
       (const_string "1")
5883
       (const_string "0")))
5884
   (set_attr "mode" "TI")])
5885
 
5886
(define_insn "ashr3"
5887
  [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5888
        (ashiftrt:SSEMODE24
5889
          (match_operand:SSEMODE24 1 "register_operand" "0")
5890
          (match_operand:SI 2 "nonmemory_operand" "xN")))]
5891
  "TARGET_SSE2"
5892
  "psra\t{%2, %0|%0, %2}"
5893
  [(set_attr "type" "sseishft")
5894
   (set_attr "prefix_data16" "1")
5895
   (set (attr "length_immediate")
5896
     (if_then_else (match_operand 2 "const_int_operand" "")
5897
       (const_string "1")
5898
       (const_string "0")))
5899
   (set_attr "mode" "TI")])
5900
 
5901
(define_insn "*avx_lshrv1ti3"
5902
  [(set (match_operand:V1TI 0 "register_operand" "=x")
5903
        (lshiftrt:V1TI
5904
         (match_operand:V1TI 1 "register_operand" "x")
5905
         (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5906
  "TARGET_AVX"
5907
{
5908
  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5909
  return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5910
}
5911
  [(set_attr "type" "sseishft")
5912
   (set_attr "prefix" "vex")
5913
   (set_attr "length_immediate" "1")
5914
   (set_attr "mode" "TI")])
5915
 
5916
(define_insn "*avx_lshr3"
5917
  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5918
        (lshiftrt:SSEMODE248
5919
          (match_operand:SSEMODE248 1 "register_operand" "x")
5920
          (match_operand:SI 2 "nonmemory_operand" "xN")))]
5921
  "TARGET_AVX"
5922
  "vpsrl\t{%2, %1, %0|%0, %1, %2}"
5923
  [(set_attr "type" "sseishft")
5924
   (set_attr "prefix" "vex")
5925
   (set (attr "length_immediate")
5926
     (if_then_else (match_operand 2 "const_int_operand" "")
5927
       (const_string "1")
5928
       (const_string "0")))
5929
   (set_attr "mode" "TI")])
5930
 
5931
(define_insn "sse2_lshrv1ti3"
5932
  [(set (match_operand:V1TI 0 "register_operand" "=x")
5933
        (lshiftrt:V1TI
5934
         (match_operand:V1TI 1 "register_operand" "0")
5935
         (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5936
  "TARGET_SSE2"
5937
{
5938
  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5939
  return "psrldq\t{%2, %0|%0, %2}";
5940
}
5941
  [(set_attr "type" "sseishft")
5942
   (set_attr "prefix_data16" "1")
5943
   (set_attr "length_immediate" "1")
5944
   (set_attr "atom_unit" "sishuf")
5945
   (set_attr "mode" "TI")])
5946
 
5947
(define_insn "lshr3"
5948
  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5949
        (lshiftrt:SSEMODE248
5950
          (match_operand:SSEMODE248 1 "register_operand" "0")
5951
          (match_operand:SI 2 "nonmemory_operand" "xN")))]
5952
  "TARGET_SSE2"
5953
  "psrl\t{%2, %0|%0, %2}"
5954
  [(set_attr "type" "sseishft")
5955
   (set_attr "prefix_data16" "1")
5956
   (set (attr "length_immediate")
5957
     (if_then_else (match_operand 2 "const_int_operand" "")
5958
       (const_string "1")
5959
       (const_string "0")))
5960
   (set_attr "mode" "TI")])
5961
 
5962
(define_insn "*avx_ashlv1ti3"
5963
  [(set (match_operand:V1TI 0 "register_operand" "=x")
5964
        (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
5965
                     (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5966
  "TARGET_AVX"
5967
{
5968
  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5969
  return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5970
}
5971
  [(set_attr "type" "sseishft")
5972
   (set_attr "prefix" "vex")
5973
   (set_attr "length_immediate" "1")
5974
   (set_attr "mode" "TI")])
5975
 
5976
(define_insn "*avx_ashl3"
5977
  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5978
        (ashift:SSEMODE248
5979
          (match_operand:SSEMODE248 1 "register_operand" "x")
5980
          (match_operand:SI 2 "nonmemory_operand" "xN")))]
5981
  "TARGET_AVX"
5982
  "vpsll\t{%2, %1, %0|%0, %1, %2}"
5983
  [(set_attr "type" "sseishft")
5984
   (set_attr "prefix" "vex")
5985
   (set (attr "length_immediate")
5986
     (if_then_else (match_operand 2 "const_int_operand" "")
5987
       (const_string "1")
5988
       (const_string "0")))
5989
   (set_attr "mode" "TI")])
5990
 
5991
(define_insn "sse2_ashlv1ti3"
5992
  [(set (match_operand:V1TI 0 "register_operand" "=x")
5993
        (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
5994
                     (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5995
  "TARGET_SSE2"
5996
{
5997
  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5998
  return "pslldq\t{%2, %0|%0, %2}";
5999
}
6000
  [(set_attr "type" "sseishft")
6001
   (set_attr "prefix_data16" "1")
6002
   (set_attr "length_immediate" "1")
6003
   (set_attr "mode" "TI")])
6004
 
6005
(define_insn "ashl3"
6006
  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
6007
        (ashift:SSEMODE248
6008
          (match_operand:SSEMODE248 1 "register_operand" "0")
6009
          (match_operand:SI 2 "nonmemory_operand" "xN")))]
6010
  "TARGET_SSE2"
6011
  "psll\t{%2, %0|%0, %2}"
6012
  [(set_attr "type" "sseishft")
6013
   (set_attr "prefix_data16" "1")
6014
   (set (attr "length_immediate")
6015
     (if_then_else (match_operand 2 "const_int_operand" "")
6016
       (const_string "1")
6017
       (const_string "0")))
6018
   (set_attr "mode" "TI")])
6019
 
6020
(define_expand "vec_shl_"
6021
  [(set (match_operand:SSEMODEI 0 "register_operand" "")
6022
        (ashift:V1TI
6023
         (match_operand:SSEMODEI 1 "register_operand" "")
6024
         (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6025
  "TARGET_SSE2"
6026
{
6027
  operands[0] = gen_lowpart (V1TImode, operands[0]);
6028
  operands[1] = gen_lowpart (V1TImode, operands[1]);
6029
})
6030
 
6031
(define_expand "vec_shr_"
6032
  [(set (match_operand:SSEMODEI 0 "register_operand" "")
6033
        (lshiftrt:V1TI
6034
         (match_operand:SSEMODEI 1 "register_operand" "")
6035
         (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
6036
  "TARGET_SSE2"
6037
{
6038
  operands[0] = gen_lowpart (V1TImode, operands[0]);
6039
  operands[1] = gen_lowpart (V1TImode, operands[1]);
6040
})
6041
 
6042
(define_insn "*avx_3"
6043
  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6044
        (maxmin:SSEMODE124
6045
          (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
6046
          (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6047
  "TARGET_AVX && ix86_binary_operator_ok (, mode, operands)"
6048
  "vp\t{%2, %1, %0|%0, %1, %2}"
6049
  [(set_attr "type" "sseiadd")
6050
   (set (attr "prefix_extra")
6051
     (if_then_else
6052
       (ne (symbol_ref "mode != (( == SMAX ||  == SMIN) ? V8HImode : V16QImode)")
6053
           (const_int 0))
6054
       (const_string "1")
6055
       (const_string "0")))
6056
   (set_attr "prefix" "vex")
6057
   (set_attr "mode" "TI")])
6058
 
6059
(define_expand "v16qi3"
6060
  [(set (match_operand:V16QI 0 "register_operand" "")
6061
        (umaxmin:V16QI
6062
          (match_operand:V16QI 1 "nonimmediate_operand" "")
6063
          (match_operand:V16QI 2 "nonimmediate_operand" "")))]
6064
  "TARGET_SSE2"
6065
  "ix86_fixup_binary_operands_no_copy (, V16QImode, operands);")
6066
 
6067
(define_insn "*v16qi3"
6068
  [(set (match_operand:V16QI 0 "register_operand" "=x")
6069
        (umaxmin:V16QI
6070
          (match_operand:V16QI 1 "nonimmediate_operand" "%0")
6071
          (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
6072
  "TARGET_SSE2 && ix86_binary_operator_ok (, V16QImode, operands)"
6073
  "pb\t{%2, %0|%0, %2}"
6074
  [(set_attr "type" "sseiadd")
6075
   (set_attr "prefix_data16" "1")
6076
   (set_attr "mode" "TI")])
6077
 
6078
(define_expand "v8hi3"
6079
  [(set (match_operand:V8HI 0 "register_operand" "")
6080
        (smaxmin:V8HI
6081
          (match_operand:V8HI 1 "nonimmediate_operand" "")
6082
          (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6083
  "TARGET_SSE2"
6084
  "ix86_fixup_binary_operands_no_copy (, V8HImode, operands);")
6085
 
6086
(define_insn "*v8hi3"
6087
  [(set (match_operand:V8HI 0 "register_operand" "=x")
6088
        (smaxmin:V8HI
6089
          (match_operand:V8HI 1 "nonimmediate_operand" "%0")
6090
          (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
6091
  "TARGET_SSE2 && ix86_binary_operator_ok (, V8HImode, operands)"
6092
  "pw\t{%2, %0|%0, %2}"
6093
  [(set_attr "type" "sseiadd")
6094
   (set_attr "prefix_data16" "1")
6095
   (set_attr "mode" "TI")])
6096
 
6097
(define_expand "umaxv8hi3"
6098
  [(set (match_operand:V8HI 0 "register_operand" "")
6099
        (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
6100
                   (match_operand:V8HI 2 "nonimmediate_operand" "")))]
6101
  "TARGET_SSE2"
6102
{
6103
  if (TARGET_SSE4_1)
6104
    ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
6105
  else
6106
    {
6107
      rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6108
      if (rtx_equal_p (op3, op2))
6109
        op3 = gen_reg_rtx (V8HImode);
6110
      emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6111
      emit_insn (gen_addv8hi3 (op0, op3, op2));
6112
      DONE;
6113
    }
6114
})
6115
 
6116
(define_expand "smax3"
6117
  [(set (match_operand:SSEMODE14 0 "register_operand" "")
6118
        (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6119
                        (match_operand:SSEMODE14 2 "register_operand" "")))]
6120
  "TARGET_SSE2"
6121
{
6122
  if (TARGET_SSE4_1)
6123
    ix86_fixup_binary_operands_no_copy (SMAX, mode, operands);
6124
  else
6125
  {
6126
    rtx xops[6];
6127
    bool ok;
6128
 
6129
    xops[0] = operands[0];
6130
    xops[1] = operands[1];
6131
    xops[2] = operands[2];
6132
    xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6133
    xops[4] = operands[1];
6134
    xops[5] = operands[2];
6135
    ok = ix86_expand_int_vcond (xops);
6136
    gcc_assert (ok);
6137
    DONE;
6138
  }
6139
})
6140
 
6141
(define_insn "*sse4_1_3"
6142
  [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
6143
        (smaxmin:SSEMODE14
6144
          (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
6145
          (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
6146
  "TARGET_SSE4_1 && ix86_binary_operator_ok (, mode, operands)"
6147
  "p\t{%2, %0|%0, %2}"
6148
  [(set_attr "type" "sseiadd")
6149
   (set_attr "prefix_extra" "1")
6150
   (set_attr "mode" "TI")])
6151
 
6152
(define_expand "smaxv2di3"
6153
  [(set (match_operand:V2DI 0 "register_operand" "")
6154
        (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
6155
                   (match_operand:V2DI 2 "register_operand" "")))]
6156
  "TARGET_SSE4_2"
6157
{
6158
  rtx xops[6];
6159
  bool ok;
6160
 
6161
  xops[0] = operands[0];
6162
  xops[1] = operands[1];
6163
  xops[2] = operands[2];
6164
  xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6165
  xops[4] = operands[1];
6166
  xops[5] = operands[2];
6167
  ok = ix86_expand_int_vcond (xops);
6168
  gcc_assert (ok);
6169
  DONE;
6170
})
6171
 
6172
(define_expand "umaxv4si3"
6173
  [(set (match_operand:V4SI 0 "register_operand" "")
6174
        (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
6175
                   (match_operand:V4SI 2 "register_operand" "")))]
6176
  "TARGET_SSE2"
6177
{
6178
  if (TARGET_SSE4_1)
6179
    ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
6180
  else
6181
  {
6182
    rtx xops[6];
6183
    bool ok;
6184
 
6185
    xops[0] = operands[0];
6186
    xops[1] = operands[1];
6187
    xops[2] = operands[2];
6188
    xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6189
    xops[4] = operands[1];
6190
    xops[5] = operands[2];
6191
    ok = ix86_expand_int_vcond (xops);
6192
    gcc_assert (ok);
6193
    DONE;
6194
  }
6195
})
6196
 
6197
(define_insn "*sse4_1_3"
6198
  [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
6199
        (umaxmin:SSEMODE24
6200
          (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
6201
          (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
6202
  "TARGET_SSE4_1 && ix86_binary_operator_ok (, mode, operands)"
6203
  "p\t{%2, %0|%0, %2}"
6204
  [(set_attr "type" "sseiadd")
6205
   (set_attr "prefix_extra" "1")
6206
   (set_attr "mode" "TI")])
6207
 
6208
(define_expand "umaxv2di3"
6209
  [(set (match_operand:V2DI 0 "register_operand" "")
6210
        (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
6211
                   (match_operand:V2DI 2 "register_operand" "")))]
6212
  "TARGET_SSE4_2"
6213
{
6214
  rtx xops[6];
6215
  bool ok;
6216
 
6217
  xops[0] = operands[0];
6218
  xops[1] = operands[1];
6219
  xops[2] = operands[2];
6220
  xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6221
  xops[4] = operands[1];
6222
  xops[5] = operands[2];
6223
  ok = ix86_expand_int_vcond (xops);
6224
  gcc_assert (ok);
6225
  DONE;
6226
})
6227
 
6228
(define_expand "smin3"
6229
  [(set (match_operand:SSEMODE14 0 "register_operand" "")
6230
        (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
6231
                        (match_operand:SSEMODE14 2 "register_operand" "")))]
6232
  "TARGET_SSE2"
6233
{
6234
  if (TARGET_SSE4_1)
6235
    ix86_fixup_binary_operands_no_copy (SMIN, mode, operands);
6236
  else
6237
    {
6238
      rtx xops[6];
6239
      bool ok;
6240
 
6241
      xops[0] = operands[0];
6242
      xops[1] = operands[2];
6243
      xops[2] = operands[1];
6244
      xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6245
      xops[4] = operands[1];
6246
      xops[5] = operands[2];
6247
      ok = ix86_expand_int_vcond (xops);
6248
      gcc_assert (ok);
6249
      DONE;
6250
    }
6251
})
6252
 
6253
(define_expand "sminv2di3"
6254
  [(set (match_operand:V2DI 0 "register_operand" "")
6255
        (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
6256
                   (match_operand:V2DI 2 "register_operand" "")))]
6257
  "TARGET_SSE4_2"
6258
{
6259
  rtx xops[6];
6260
  bool ok;
6261
 
6262
  xops[0] = operands[0];
6263
  xops[1] = operands[2];
6264
  xops[2] = operands[1];
6265
  xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
6266
  xops[4] = operands[1];
6267
  xops[5] = operands[2];
6268
  ok = ix86_expand_int_vcond (xops);
6269
  gcc_assert (ok);
6270
  DONE;
6271
})
6272
 
6273
(define_expand "umin3"
6274
  [(set (match_operand:SSEMODE24 0 "register_operand" "")
6275
        (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
6276
                        (match_operand:SSEMODE24 2 "register_operand" "")))]
6277
  "TARGET_SSE2"
6278
{
6279
  if (TARGET_SSE4_1)
6280
    ix86_fixup_binary_operands_no_copy (UMIN, mode, operands);
6281
  else
6282
    {
6283
      rtx xops[6];
6284
      bool ok;
6285
 
6286
      xops[0] = operands[0];
6287
      xops[1] = operands[2];
6288
      xops[2] = operands[1];
6289
      xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6290
      xops[4] = operands[1];
6291
      xops[5] = operands[2];
6292
      ok = ix86_expand_int_vcond (xops);
6293
      gcc_assert (ok);
6294
      DONE;
6295
    }
6296
})
6297
 
6298
(define_expand "uminv2di3"
6299
  [(set (match_operand:V2DI 0 "register_operand" "")
6300
        (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
6301
                   (match_operand:V2DI 2 "register_operand" "")))]
6302
  "TARGET_SSE4_2"
6303
{
6304
  rtx xops[6];
6305
  bool ok;
6306
 
6307
  xops[0] = operands[0];
6308
  xops[1] = operands[2];
6309
  xops[2] = operands[1];
6310
  xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6311
  xops[4] = operands[1];
6312
  xops[5] = operands[2];
6313
  ok = ix86_expand_int_vcond (xops);
6314
  gcc_assert (ok);
6315
  DONE;
6316
})
6317
 
6318
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6319
;;
6320
;; Parallel integral comparisons
6321
;;
6322
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6323
 
6324
(define_expand "sse2_eq3"
6325
  [(set (match_operand:SSEMODE124 0 "register_operand" "")
6326
        (eq:SSEMODE124
6327
          (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
6328
          (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
6329
  "TARGET_SSE2 && !TARGET_XOP "
6330
  "ix86_fixup_binary_operands_no_copy (EQ, mode, operands);")
6331
 
6332
(define_insn "*avx_eq3"
6333
  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6334
        (eq:SSEMODE1248
6335
          (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
6336
          (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6337
  "TARGET_AVX && ix86_binary_operator_ok (EQ, mode, operands)"
6338
  "vpcmpeq\t{%2, %1, %0|%0, %1, %2}"
6339
  [(set_attr "type" "ssecmp")
6340
   (set (attr "prefix_extra")
6341
     (if_then_else (match_operand:V2DI 0 "" "")
6342
       (const_string "1")
6343
       (const_string "*")))
6344
   (set_attr "prefix" "vex")
6345
   (set_attr "mode" "TI")])
6346
 
6347
(define_insn "*sse2_eq3"
6348
  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6349
        (eq:SSEMODE124
6350
          (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
6351
          (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6352
  "TARGET_SSE2 && !TARGET_XOP
6353
   && ix86_binary_operator_ok (EQ, mode, operands)"
6354
  "pcmpeq\t{%2, %0|%0, %2}"
6355
  [(set_attr "type" "ssecmp")
6356
   (set_attr "prefix_data16" "1")
6357
   (set_attr "mode" "TI")])
6358
 
6359
(define_expand "sse4_1_eqv2di3"
6360
  [(set (match_operand:V2DI 0 "register_operand" "")
6361
        (eq:V2DI
6362
          (match_operand:V2DI 1 "nonimmediate_operand" "")
6363
          (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6364
  "TARGET_SSE4_1"
6365
  "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6366
 
6367
(define_insn "*sse4_1_eqv2di3"
6368
  [(set (match_operand:V2DI 0 "register_operand" "=x")
6369
        (eq:V2DI
6370
          (match_operand:V2DI 1 "nonimmediate_operand" "%0")
6371
          (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6372
  "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6373
  "pcmpeqq\t{%2, %0|%0, %2}"
6374
  [(set_attr "type" "ssecmp")
6375
   (set_attr "prefix_extra" "1")
6376
   (set_attr "mode" "TI")])
6377
 
6378
(define_insn "*avx_gt3"
6379
  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
6380
        (gt:SSEMODE1248
6381
          (match_operand:SSEMODE1248 1 "register_operand" "x")
6382
          (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
6383
  "TARGET_AVX"
6384
  "vpcmpgt\t{%2, %1, %0|%0, %1, %2}"
6385
  [(set_attr "type" "ssecmp")
6386
   (set (attr "prefix_extra")
6387
     (if_then_else (match_operand:V2DI 0 "" "")
6388
       (const_string "1")
6389
       (const_string "*")))
6390
   (set_attr "prefix" "vex")
6391
   (set_attr "mode" "TI")])
6392
 
6393
(define_insn "sse2_gt3"
6394
  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6395
        (gt:SSEMODE124
6396
          (match_operand:SSEMODE124 1 "register_operand" "0")
6397
          (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
6398
  "TARGET_SSE2 && !TARGET_XOP"
6399
  "pcmpgt\t{%2, %0|%0, %2}"
6400
  [(set_attr "type" "ssecmp")
6401
   (set_attr "prefix_data16" "1")
6402
   (set_attr "mode" "TI")])
6403
 
6404
(define_insn "sse4_2_gtv2di3"
6405
  [(set (match_operand:V2DI 0 "register_operand" "=x")
6406
        (gt:V2DI
6407
          (match_operand:V2DI 1 "register_operand" "0")
6408
          (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
6409
  "TARGET_SSE4_2"
6410
  "pcmpgtq\t{%2, %0|%0, %2}"
6411
  [(set_attr "type" "ssecmp")
6412
   (set_attr "prefix_extra" "1")
6413
   (set_attr "mode" "TI")])
6414
 
6415
(define_expand "vcond"
6416
  [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6417
        (if_then_else:SSEMODE124C8
6418
          (match_operator 3 ""
6419
            [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6420
             (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6421
          (match_operand:SSEMODE124C8 1 "general_operand" "")
6422
          (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6423
  "TARGET_SSE2"
6424
{
6425
  bool ok = ix86_expand_int_vcond (operands);
6426
  gcc_assert (ok);
6427
  DONE;
6428
})
6429
 
6430
(define_expand "vcondu"
6431
  [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
6432
        (if_then_else:SSEMODE124C8
6433
          (match_operator 3 ""
6434
            [(match_operand:SSEMODE124C8 4 "nonimmediate_operand" "")
6435
             (match_operand:SSEMODE124C8 5 "nonimmediate_operand" "")])
6436
          (match_operand:SSEMODE124C8 1 "general_operand" "")
6437
          (match_operand:SSEMODE124C8 2 "general_operand" "")))]
6438
  "TARGET_SSE2"
6439
{
6440
  bool ok = ix86_expand_int_vcond (operands);
6441
  gcc_assert (ok);
6442
  DONE;
6443
})
6444
 
6445
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6446
;;
6447
;; Parallel bitwise logical operations
6448
;;
6449
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6450
 
6451
(define_expand "one_cmpl2"
6452
  [(set (match_operand:SSEMODEI 0 "register_operand" "")
6453
        (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6454
                      (match_dup 2)))]
6455
  "TARGET_SSE2"
6456
{
6457
  int i, n = GET_MODE_NUNITS (mode);
6458
  rtvec v = rtvec_alloc (n);
6459
 
6460
  for (i = 0; i < n; ++i)
6461
    RTVEC_ELT (v, i) = constm1_rtx;
6462
 
6463
  operands[2] = force_reg (mode, gen_rtx_CONST_VECTOR (mode, v));
6464
})
6465
 
6466
(define_insn "*avx_andnot3"
6467
  [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6468
        (and:AVX256MODEI
6469
          (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
6470
          (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6471
  "TARGET_AVX"
6472
  "vandnps\t{%2, %1, %0|%0, %1, %2}"
6473
  [(set_attr "type" "sselog")
6474
   (set_attr "prefix" "vex")
6475
   (set_attr "mode" "")])
6476
 
6477
(define_insn "*sse_andnot3"
6478
  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6479
        (and:SSEMODEI
6480
          (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6481
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6482
  "(TARGET_SSE && !TARGET_SSE2)"
6483
  "andnps\t{%2, %0|%0, %2}"
6484
  [(set_attr "type" "sselog")
6485
   (set_attr "mode" "V4SF")])
6486
 
6487
(define_insn "*avx_andnot3"
6488
  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6489
        (and:SSEMODEI
6490
          (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
6491
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6492
  "TARGET_AVX"
6493
  "vpandn\t{%2, %1, %0|%0, %1, %2}"
6494
  [(set_attr "type" "sselog")
6495
   (set_attr "prefix" "vex")
6496
   (set_attr "mode" "TI")])
6497
 
6498
(define_insn "sse2_andnot3"
6499
  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6500
        (and:SSEMODEI
6501
          (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
6502
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6503
  "TARGET_SSE2"
6504
  "pandn\t{%2, %0|%0, %2}"
6505
  [(set_attr "type" "sselog")
6506
   (set_attr "prefix_data16" "1")
6507
   (set_attr "mode" "TI")])
6508
 
6509
(define_insn "*andnottf3"
6510
  [(set (match_operand:TF 0 "register_operand" "=x")
6511
        (and:TF
6512
          (not:TF (match_operand:TF 1 "register_operand" "0"))
6513
          (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6514
  "TARGET_SSE2"
6515
  "pandn\t{%2, %0|%0, %2}"
6516
  [(set_attr "type" "sselog")
6517
   (set_attr "prefix_data16" "1")
6518
   (set_attr "mode" "TI")])
6519
 
6520
(define_expand "3"
6521
  [(set (match_operand:SSEMODEI 0 "register_operand" "")
6522
        (any_logic:SSEMODEI
6523
          (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
6524
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
6525
  "TARGET_SSE"
6526
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
6527
 
6528
(define_insn "*avx_3"
6529
  [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
6530
        (any_logic:AVX256MODEI
6531
          (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
6532
          (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
6533
  "TARGET_AVX
6534
   && ix86_binary_operator_ok (, mode, operands)"
6535
  "vps\t{%2, %1, %0|%0, %1, %2}"
6536
  [(set_attr "type" "sselog")
6537
   (set_attr "prefix" "vex")
6538
   (set_attr "mode" "")])
6539
 
6540
(define_insn "*sse_3"
6541
  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6542
        (any_logic:SSEMODEI
6543
          (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6544
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6545
  "(TARGET_SSE && !TARGET_SSE2)
6546
   && ix86_binary_operator_ok (, mode, operands)"
6547
  "ps\t{%2, %0|%0, %2}"
6548
  [(set_attr "type" "sselog")
6549
   (set_attr "mode" "V4SF")])
6550
 
6551
(define_insn "*avx_3"
6552
  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6553
        (any_logic:SSEMODEI
6554
          (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
6555
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6556
  "TARGET_AVX
6557
   && ix86_binary_operator_ok (, mode, operands)"
6558
  "vp\t{%2, %1, %0|%0, %1, %2}"
6559
  [(set_attr "type" "sselog")
6560
   (set_attr "prefix" "vex")
6561
   (set_attr "mode" "TI")])
6562
 
6563
(define_insn "*sse2_3"
6564
  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
6565
        (any_logic:SSEMODEI
6566
          (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
6567
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
6568
  "TARGET_SSE2 && ix86_binary_operator_ok (, mode, operands)"
6569
  "p\t{%2, %0|%0, %2}"
6570
  [(set_attr "type" "sselog")
6571
   (set_attr "prefix_data16" "1")
6572
   (set_attr "mode" "TI")])
6573
 
6574
(define_expand "tf3"
6575
  [(set (match_operand:TF 0 "register_operand" "")
6576
        (any_logic:TF
6577
          (match_operand:TF 1 "nonimmediate_operand" "")
6578
          (match_operand:TF 2 "nonimmediate_operand" "")))]
6579
  "TARGET_SSE2"
6580
  "ix86_fixup_binary_operands_no_copy (, TFmode, operands);")
6581
 
6582
(define_insn "*tf3"
6583
  [(set (match_operand:TF 0 "register_operand" "=x")
6584
        (any_logic:TF
6585
          (match_operand:TF 1 "nonimmediate_operand" "%0")
6586
          (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6587
  "TARGET_SSE2 && ix86_binary_operator_ok (, TFmode, operands)"
6588
  "p\t{%2, %0|%0, %2}"
6589
  [(set_attr "type" "sselog")
6590
   (set_attr "prefix_data16" "1")
6591
   (set_attr "mode" "TI")])
6592
 
6593
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6594
;;
6595
;; Parallel integral element swizzling
6596
;;
6597
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6598
 
6599
(define_expand "vec_pack_trunc_v8hi"
6600
  [(match_operand:V16QI 0 "register_operand" "")
6601
   (match_operand:V8HI 1 "register_operand" "")
6602
   (match_operand:V8HI 2 "register_operand" "")]
6603
  "TARGET_SSE2"
6604
{
6605
  rtx op1 = gen_lowpart (V16QImode, operands[1]);
6606
  rtx op2 = gen_lowpart (V16QImode, operands[2]);
6607
  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6608
  DONE;
6609
})
6610
 
6611
(define_expand "vec_pack_trunc_v4si"
6612
  [(match_operand:V8HI 0 "register_operand" "")
6613
   (match_operand:V4SI 1 "register_operand" "")
6614
   (match_operand:V4SI 2 "register_operand" "")]
6615
  "TARGET_SSE2"
6616
{
6617
  rtx op1 = gen_lowpart (V8HImode, operands[1]);
6618
  rtx op2 = gen_lowpart (V8HImode, operands[2]);
6619
  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6620
  DONE;
6621
})
6622
 
6623
(define_expand "vec_pack_trunc_v2di"
6624
  [(match_operand:V4SI 0 "register_operand" "")
6625
   (match_operand:V2DI 1 "register_operand" "")
6626
   (match_operand:V2DI 2 "register_operand" "")]
6627
  "TARGET_SSE2"
6628
{
6629
  rtx op1 = gen_lowpart (V4SImode, operands[1]);
6630
  rtx op2 = gen_lowpart (V4SImode, operands[2]);
6631
  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6632
  DONE;
6633
})
6634
 
6635
(define_insn "*avx_packsswb"
6636
  [(set (match_operand:V16QI 0 "register_operand" "=x")
6637
        (vec_concat:V16QI
6638
          (ss_truncate:V8QI
6639
            (match_operand:V8HI 1 "register_operand" "x"))
6640
          (ss_truncate:V8QI
6641
            (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6642
  "TARGET_AVX"
6643
  "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6644
  [(set_attr "type" "sselog")
6645
   (set_attr "prefix" "vex")
6646
   (set_attr "mode" "TI")])
6647
 
6648
(define_insn "sse2_packsswb"
6649
  [(set (match_operand:V16QI 0 "register_operand" "=x")
6650
        (vec_concat:V16QI
6651
          (ss_truncate:V8QI
6652
            (match_operand:V8HI 1 "register_operand" "0"))
6653
          (ss_truncate:V8QI
6654
            (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6655
  "TARGET_SSE2"
6656
  "packsswb\t{%2, %0|%0, %2}"
6657
  [(set_attr "type" "sselog")
6658
   (set_attr "prefix_data16" "1")
6659
   (set_attr "mode" "TI")])
6660
 
6661
(define_insn "*avx_packssdw"
6662
  [(set (match_operand:V8HI 0 "register_operand" "=x")
6663
        (vec_concat:V8HI
6664
          (ss_truncate:V4HI
6665
            (match_operand:V4SI 1 "register_operand" "x"))
6666
          (ss_truncate:V4HI
6667
            (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6668
  "TARGET_AVX"
6669
  "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6670
  [(set_attr "type" "sselog")
6671
   (set_attr "prefix" "vex")
6672
   (set_attr "mode" "TI")])
6673
 
6674
(define_insn "sse2_packssdw"
6675
  [(set (match_operand:V8HI 0 "register_operand" "=x")
6676
        (vec_concat:V8HI
6677
          (ss_truncate:V4HI
6678
            (match_operand:V4SI 1 "register_operand" "0"))
6679
          (ss_truncate:V4HI
6680
            (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
6681
  "TARGET_SSE2"
6682
  "packssdw\t{%2, %0|%0, %2}"
6683
  [(set_attr "type" "sselog")
6684
   (set_attr "prefix_data16" "1")
6685
   (set_attr "mode" "TI")])
6686
 
6687
(define_insn "*avx_packuswb"
6688
  [(set (match_operand:V16QI 0 "register_operand" "=x")
6689
        (vec_concat:V16QI
6690
          (us_truncate:V8QI
6691
            (match_operand:V8HI 1 "register_operand" "x"))
6692
          (us_truncate:V8QI
6693
            (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6694
  "TARGET_AVX"
6695
  "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6696
  [(set_attr "type" "sselog")
6697
   (set_attr "prefix" "vex")
6698
   (set_attr "mode" "TI")])
6699
 
6700
(define_insn "sse2_packuswb"
6701
  [(set (match_operand:V16QI 0 "register_operand" "=x")
6702
        (vec_concat:V16QI
6703
          (us_truncate:V8QI
6704
            (match_operand:V8HI 1 "register_operand" "0"))
6705
          (us_truncate:V8QI
6706
            (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
6707
  "TARGET_SSE2"
6708
  "packuswb\t{%2, %0|%0, %2}"
6709
  [(set_attr "type" "sselog")
6710
   (set_attr "prefix_data16" "1")
6711
   (set_attr "mode" "TI")])
6712
 
6713
(define_insn "*avx_interleave_highv16qi"
6714
  [(set (match_operand:V16QI 0 "register_operand" "=x")
6715
        (vec_select:V16QI
6716
          (vec_concat:V32QI
6717
            (match_operand:V16QI 1 "register_operand" "x")
6718
            (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6719
          (parallel [(const_int 8)  (const_int 24)
6720
                     (const_int 9)  (const_int 25)
6721
                     (const_int 10) (const_int 26)
6722
                     (const_int 11) (const_int 27)
6723
                     (const_int 12) (const_int 28)
6724
                     (const_int 13) (const_int 29)
6725
                     (const_int 14) (const_int 30)
6726
                     (const_int 15) (const_int 31)])))]
6727
  "TARGET_AVX"
6728
  "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6729
  [(set_attr "type" "sselog")
6730
   (set_attr "prefix" "vex")
6731
   (set_attr "mode" "TI")])
6732
 
6733
(define_insn "vec_interleave_highv16qi"
6734
  [(set (match_operand:V16QI 0 "register_operand" "=x")
6735
        (vec_select:V16QI
6736
          (vec_concat:V32QI
6737
            (match_operand:V16QI 1 "register_operand" "0")
6738
            (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6739
          (parallel [(const_int 8)  (const_int 24)
6740
                     (const_int 9)  (const_int 25)
6741
                     (const_int 10) (const_int 26)
6742
                     (const_int 11) (const_int 27)
6743
                     (const_int 12) (const_int 28)
6744
                     (const_int 13) (const_int 29)
6745
                     (const_int 14) (const_int 30)
6746
                     (const_int 15) (const_int 31)])))]
6747
  "TARGET_SSE2"
6748
  "punpckhbw\t{%2, %0|%0, %2}"
6749
  [(set_attr "type" "sselog")
6750
   (set_attr "prefix_data16" "1")
6751
   (set_attr "mode" "TI")])
6752
 
6753
(define_insn "*avx_interleave_lowv16qi"
6754
  [(set (match_operand:V16QI 0 "register_operand" "=x")
6755
        (vec_select:V16QI
6756
          (vec_concat:V32QI
6757
            (match_operand:V16QI 1 "register_operand" "x")
6758
            (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6759
          (parallel [(const_int 0) (const_int 16)
6760
                     (const_int 1) (const_int 17)
6761
                     (const_int 2) (const_int 18)
6762
                     (const_int 3) (const_int 19)
6763
                     (const_int 4) (const_int 20)
6764
                     (const_int 5) (const_int 21)
6765
                     (const_int 6) (const_int 22)
6766
                     (const_int 7) (const_int 23)])))]
6767
  "TARGET_AVX"
6768
  "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6769
  [(set_attr "type" "sselog")
6770
   (set_attr "prefix" "vex")
6771
   (set_attr "mode" "TI")])
6772
 
6773
(define_insn "vec_interleave_lowv16qi"
6774
  [(set (match_operand:V16QI 0 "register_operand" "=x")
6775
        (vec_select:V16QI
6776
          (vec_concat:V32QI
6777
            (match_operand:V16QI 1 "register_operand" "0")
6778
            (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6779
          (parallel [(const_int 0) (const_int 16)
6780
                     (const_int 1) (const_int 17)
6781
                     (const_int 2) (const_int 18)
6782
                     (const_int 3) (const_int 19)
6783
                     (const_int 4) (const_int 20)
6784
                     (const_int 5) (const_int 21)
6785
                     (const_int 6) (const_int 22)
6786
                     (const_int 7) (const_int 23)])))]
6787
  "TARGET_SSE2"
6788
  "punpcklbw\t{%2, %0|%0, %2}"
6789
  [(set_attr "type" "sselog")
6790
   (set_attr "prefix_data16" "1")
6791
   (set_attr "mode" "TI")])
6792
 
6793
(define_insn "*avx_interleave_highv8hi"
6794
  [(set (match_operand:V8HI 0 "register_operand" "=x")
6795
        (vec_select:V8HI
6796
          (vec_concat:V16HI
6797
            (match_operand:V8HI 1 "register_operand" "x")
6798
            (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6799
          (parallel [(const_int 4) (const_int 12)
6800
                     (const_int 5) (const_int 13)
6801
                     (const_int 6) (const_int 14)
6802
                     (const_int 7) (const_int 15)])))]
6803
  "TARGET_AVX"
6804
  "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6805
  [(set_attr "type" "sselog")
6806
   (set_attr "prefix" "vex")
6807
   (set_attr "mode" "TI")])
6808
 
6809
(define_insn "vec_interleave_highv8hi"
6810
  [(set (match_operand:V8HI 0 "register_operand" "=x")
6811
        (vec_select:V8HI
6812
          (vec_concat:V16HI
6813
            (match_operand:V8HI 1 "register_operand" "0")
6814
            (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6815
          (parallel [(const_int 4) (const_int 12)
6816
                     (const_int 5) (const_int 13)
6817
                     (const_int 6) (const_int 14)
6818
                     (const_int 7) (const_int 15)])))]
6819
  "TARGET_SSE2"
6820
  "punpckhwd\t{%2, %0|%0, %2}"
6821
  [(set_attr "type" "sselog")
6822
   (set_attr "prefix_data16" "1")
6823
   (set_attr "mode" "TI")])
6824
 
6825
(define_insn "*avx_interleave_lowv8hi"
6826
  [(set (match_operand:V8HI 0 "register_operand" "=x")
6827
        (vec_select:V8HI
6828
          (vec_concat:V16HI
6829
            (match_operand:V8HI 1 "register_operand" "x")
6830
            (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6831
          (parallel [(const_int 0) (const_int 8)
6832
                     (const_int 1) (const_int 9)
6833
                     (const_int 2) (const_int 10)
6834
                     (const_int 3) (const_int 11)])))]
6835
  "TARGET_AVX"
6836
  "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6837
  [(set_attr "type" "sselog")
6838
   (set_attr "prefix" "vex")
6839
   (set_attr "mode" "TI")])
6840
 
6841
(define_insn "vec_interleave_lowv8hi"
6842
  [(set (match_operand:V8HI 0 "register_operand" "=x")
6843
        (vec_select:V8HI
6844
          (vec_concat:V16HI
6845
            (match_operand:V8HI 1 "register_operand" "0")
6846
            (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6847
          (parallel [(const_int 0) (const_int 8)
6848
                     (const_int 1) (const_int 9)
6849
                     (const_int 2) (const_int 10)
6850
                     (const_int 3) (const_int 11)])))]
6851
  "TARGET_SSE2"
6852
  "punpcklwd\t{%2, %0|%0, %2}"
6853
  [(set_attr "type" "sselog")
6854
   (set_attr "prefix_data16" "1")
6855
   (set_attr "mode" "TI")])
6856
 
6857
(define_insn "*avx_interleave_highv4si"
6858
  [(set (match_operand:V4SI 0 "register_operand" "=x")
6859
        (vec_select:V4SI
6860
          (vec_concat:V8SI
6861
            (match_operand:V4SI 1 "register_operand" "x")
6862
            (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6863
          (parallel [(const_int 2) (const_int 6)
6864
                     (const_int 3) (const_int 7)])))]
6865
  "TARGET_AVX"
6866
  "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6867
  [(set_attr "type" "sselog")
6868
   (set_attr "prefix" "vex")
6869
   (set_attr "mode" "TI")])
6870
 
6871
(define_insn "vec_interleave_highv4si"
6872
  [(set (match_operand:V4SI 0 "register_operand" "=x")
6873
        (vec_select:V4SI
6874
          (vec_concat:V8SI
6875
            (match_operand:V4SI 1 "register_operand" "0")
6876
            (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6877
          (parallel [(const_int 2) (const_int 6)
6878
                     (const_int 3) (const_int 7)])))]
6879
  "TARGET_SSE2"
6880
  "punpckhdq\t{%2, %0|%0, %2}"
6881
  [(set_attr "type" "sselog")
6882
   (set_attr "prefix_data16" "1")
6883
   (set_attr "mode" "TI")])
6884
 
6885
(define_insn "*avx_interleave_lowv4si"
6886
  [(set (match_operand:V4SI 0 "register_operand" "=x")
6887
        (vec_select:V4SI
6888
          (vec_concat:V8SI
6889
            (match_operand:V4SI 1 "register_operand" "x")
6890
            (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6891
          (parallel [(const_int 0) (const_int 4)
6892
                     (const_int 1) (const_int 5)])))]
6893
  "TARGET_AVX"
6894
  "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6895
  [(set_attr "type" "sselog")
6896
   (set_attr "prefix" "vex")
6897
   (set_attr "mode" "TI")])
6898
 
6899
(define_insn "vec_interleave_lowv4si"
6900
  [(set (match_operand:V4SI 0 "register_operand" "=x")
6901
        (vec_select:V4SI
6902
          (vec_concat:V8SI
6903
            (match_operand:V4SI 1 "register_operand" "0")
6904
            (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6905
          (parallel [(const_int 0) (const_int 4)
6906
                     (const_int 1) (const_int 5)])))]
6907
  "TARGET_SSE2"
6908
  "punpckldq\t{%2, %0|%0, %2}"
6909
  [(set_attr "type" "sselog")
6910
   (set_attr "prefix_data16" "1")
6911
   (set_attr "mode" "TI")])
6912
 
6913
(define_insn "*avx_pinsr"
6914
  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6915
        (vec_merge:SSEMODE124
6916
          (vec_duplicate:SSEMODE124
6917
            (match_operand: 2 "nonimmediate_operand" "rm"))
6918
          (match_operand:SSEMODE124 1 "register_operand" "x")
6919
          (match_operand:SI 3 "const_pow2_1_to__operand" "n")))]
6920
  "TARGET_AVX"
6921
{
6922
  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6923
  if (MEM_P (operands[2]))
6924
    return "vpinsr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6925
  else
6926
    return "vpinsr\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6927
}
6928
  [(set_attr "type" "sselog")
6929
   (set (attr "prefix_extra")
6930
     (if_then_else (match_operand:V8HI 0 "register_operand" "")
6931
       (const_string "0")
6932
       (const_string "1")))
6933
   (set_attr "length_immediate" "1")
6934
   (set_attr "prefix" "vex")
6935
   (set_attr "mode" "TI")])
6936
 
6937
(define_insn "*sse4_1_pinsrb"
6938
  [(set (match_operand:V16QI 0 "register_operand" "=x")
6939
        (vec_merge:V16QI
6940
          (vec_duplicate:V16QI
6941
            (match_operand:QI 2 "nonimmediate_operand" "rm"))
6942
          (match_operand:V16QI 1 "register_operand" "0")
6943
          (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
6944
  "TARGET_SSE4_1"
6945
{
6946
  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6947
  if (MEM_P (operands[2]))
6948
    return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
6949
  else
6950
    return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6951
}
6952
  [(set_attr "type" "sselog")
6953
   (set_attr "prefix_extra" "1")
6954
   (set_attr "length_immediate" "1")
6955
   (set_attr "mode" "TI")])
6956
 
6957
(define_insn "*sse2_pinsrw"
6958
  [(set (match_operand:V8HI 0 "register_operand" "=x")
6959
        (vec_merge:V8HI
6960
          (vec_duplicate:V8HI
6961
            (match_operand:HI 2 "nonimmediate_operand" "rm"))
6962
          (match_operand:V8HI 1 "register_operand" "0")
6963
          (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
6964
  "TARGET_SSE2"
6965
{
6966
  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6967
  if (MEM_P (operands[2]))
6968
    return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
6969
  else
6970
    return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6971
}
6972
  [(set_attr "type" "sselog")
6973
   (set_attr "prefix_data16" "1")
6974
   (set_attr "length_immediate" "1")
6975
   (set_attr "mode" "TI")])
6976
 
6977
;; It must come before sse2_loadld since it is preferred.
6978
(define_insn "*sse4_1_pinsrd"
6979
  [(set (match_operand:V4SI 0 "register_operand" "=x")
6980
        (vec_merge:V4SI
6981
          (vec_duplicate:V4SI
6982
            (match_operand:SI 2 "nonimmediate_operand" "rm"))
6983
          (match_operand:V4SI 1 "register_operand" "0")
6984
          (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
6985
  "TARGET_SSE4_1"
6986
{
6987
  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6988
  return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6989
}
6990
  [(set_attr "type" "sselog")
6991
   (set_attr "prefix_extra" "1")
6992
   (set_attr "length_immediate" "1")
6993
   (set_attr "mode" "TI")])
6994
 
6995
(define_insn "*avx_pinsrq"
6996
  [(set (match_operand:V2DI 0 "register_operand" "=x")
6997
        (vec_merge:V2DI
6998
          (vec_duplicate:V2DI
6999
            (match_operand:DI 2 "nonimmediate_operand" "rm"))
7000
          (match_operand:V2DI 1 "register_operand" "x")
7001
          (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7002
  "TARGET_AVX && TARGET_64BIT"
7003
{
7004
  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7005
  return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7006
}
7007
  [(set_attr "type" "sselog")
7008
   (set_attr "prefix_extra" "1")
7009
   (set_attr "length_immediate" "1")
7010
   (set_attr "prefix" "vex")
7011
   (set_attr "mode" "TI")])
7012
 
7013
(define_insn "*sse4_1_pinsrq"
7014
  [(set (match_operand:V2DI 0 "register_operand" "=x")
7015
        (vec_merge:V2DI
7016
          (vec_duplicate:V2DI
7017
            (match_operand:DI 2 "nonimmediate_operand" "rm"))
7018
          (match_operand:V2DI 1 "register_operand" "0")
7019
          (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
7020
  "TARGET_SSE4_1 && TARGET_64BIT"
7021
{
7022
  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
7023
  return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
7024
}
7025
  [(set_attr "type" "sselog")
7026
   (set_attr "prefix_rex" "1")
7027
   (set_attr "prefix_extra" "1")
7028
   (set_attr "length_immediate" "1")
7029
   (set_attr "mode" "TI")])
7030
 
7031
(define_insn "*sse4_1_pextrb"
7032
  [(set (match_operand:SI 0 "register_operand" "=r")
7033
        (zero_extend:SI
7034
          (vec_select:QI
7035
            (match_operand:V16QI 1 "register_operand" "x")
7036
            (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
7037
  "TARGET_SSE4_1"
7038
  "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7039
  [(set_attr "type" "sselog")
7040
   (set_attr "prefix_extra" "1")
7041
   (set_attr "length_immediate" "1")
7042
   (set_attr "prefix" "maybe_vex")
7043
   (set_attr "mode" "TI")])
7044
 
7045
(define_insn "*sse4_1_pextrb_memory"
7046
  [(set (match_operand:QI 0 "memory_operand" "=m")
7047
        (vec_select:QI
7048
          (match_operand:V16QI 1 "register_operand" "x")
7049
          (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
7050
  "TARGET_SSE4_1"
7051
  "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
7052
  [(set_attr "type" "sselog")
7053
   (set_attr "prefix_extra" "1")
7054
   (set_attr "length_immediate" "1")
7055
   (set_attr "prefix" "maybe_vex")
7056
   (set_attr "mode" "TI")])
7057
 
7058
(define_insn "*sse2_pextrw"
7059
  [(set (match_operand:SI 0 "register_operand" "=r")
7060
        (zero_extend:SI
7061
          (vec_select:HI
7062
            (match_operand:V8HI 1 "register_operand" "x")
7063
            (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7064
  "TARGET_SSE2"
7065
  "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7066
  [(set_attr "type" "sselog")
7067
   (set_attr "prefix_data16" "1")
7068
   (set_attr "length_immediate" "1")
7069
   (set_attr "prefix" "maybe_vex")
7070
   (set_attr "mode" "TI")])
7071
 
7072
(define_insn "*sse4_1_pextrw_memory"
7073
  [(set (match_operand:HI 0 "memory_operand" "=m")
7074
        (vec_select:HI
7075
          (match_operand:V8HI 1 "register_operand" "x")
7076
          (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7077
  "TARGET_SSE4_1"
7078
  "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7079
  [(set_attr "type" "sselog")
7080
   (set_attr "prefix_extra" "1")
7081
   (set_attr "length_immediate" "1")
7082
   (set_attr "prefix" "maybe_vex")
7083
   (set_attr "mode" "TI")])
7084
 
7085
(define_insn "*sse4_1_pextrd"
7086
  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7087
        (vec_select:SI
7088
          (match_operand:V4SI 1 "register_operand" "x")
7089
          (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7090
  "TARGET_SSE4_1"
7091
  "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7092
  [(set_attr "type" "sselog")
7093
   (set_attr "prefix_extra" "1")
7094
   (set_attr "length_immediate" "1")
7095
   (set_attr "prefix" "maybe_vex")
7096
   (set_attr "mode" "TI")])
7097
 
7098
;; It must come before *vec_extractv2di_1_sse since it is preferred.
7099
(define_insn "*sse4_1_pextrq"
7100
  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7101
        (vec_select:DI
7102
          (match_operand:V2DI 1 "register_operand" "x")
7103
          (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7104
  "TARGET_SSE4_1 && TARGET_64BIT"
7105
  "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7106
  [(set_attr "type" "sselog")
7107
   (set_attr "prefix_rex" "1")
7108
   (set_attr "prefix_extra" "1")
7109
   (set_attr "length_immediate" "1")
7110
   (set_attr "prefix" "maybe_vex")
7111
   (set_attr "mode" "TI")])
7112
 
7113
(define_expand "sse2_pshufd"
7114
  [(match_operand:V4SI 0 "register_operand" "")
7115
   (match_operand:V4SI 1 "nonimmediate_operand" "")
7116
   (match_operand:SI 2 "const_int_operand" "")]
7117
  "TARGET_SSE2"
7118
{
7119
  int mask = INTVAL (operands[2]);
7120
  emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7121
                                GEN_INT ((mask >> 0) & 3),
7122
                                GEN_INT ((mask >> 2) & 3),
7123
                                GEN_INT ((mask >> 4) & 3),
7124
                                GEN_INT ((mask >> 6) & 3)));
7125
  DONE;
7126
})
7127
 
7128
(define_insn "sse2_pshufd_1"
7129
  [(set (match_operand:V4SI 0 "register_operand" "=x")
7130
        (vec_select:V4SI
7131
          (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7132
          (parallel [(match_operand 2 "const_0_to_3_operand" "")
7133
                     (match_operand 3 "const_0_to_3_operand" "")
7134
                     (match_operand 4 "const_0_to_3_operand" "")
7135
                     (match_operand 5 "const_0_to_3_operand" "")])))]
7136
  "TARGET_SSE2"
7137
{
7138
  int mask = 0;
7139
  mask |= INTVAL (operands[2]) << 0;
7140
  mask |= INTVAL (operands[3]) << 2;
7141
  mask |= INTVAL (operands[4]) << 4;
7142
  mask |= INTVAL (operands[5]) << 6;
7143
  operands[2] = GEN_INT (mask);
7144
 
7145
  return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7146
}
7147
  [(set_attr "type" "sselog1")
7148
   (set_attr "prefix_data16" "1")
7149
   (set_attr "prefix" "maybe_vex")
7150
   (set_attr "length_immediate" "1")
7151
   (set_attr "mode" "TI")])
7152
 
7153
(define_expand "sse2_pshuflw"
7154
  [(match_operand:V8HI 0 "register_operand" "")
7155
   (match_operand:V8HI 1 "nonimmediate_operand" "")
7156
   (match_operand:SI 2 "const_int_operand" "")]
7157
  "TARGET_SSE2"
7158
{
7159
  int mask = INTVAL (operands[2]);
7160
  emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7161
                                 GEN_INT ((mask >> 0) & 3),
7162
                                 GEN_INT ((mask >> 2) & 3),
7163
                                 GEN_INT ((mask >> 4) & 3),
7164
                                 GEN_INT ((mask >> 6) & 3)));
7165
  DONE;
7166
})
7167
 
7168
(define_insn "sse2_pshuflw_1"
7169
  [(set (match_operand:V8HI 0 "register_operand" "=x")
7170
        (vec_select:V8HI
7171
          (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7172
          (parallel [(match_operand 2 "const_0_to_3_operand" "")
7173
                     (match_operand 3 "const_0_to_3_operand" "")
7174
                     (match_operand 4 "const_0_to_3_operand" "")
7175
                     (match_operand 5 "const_0_to_3_operand" "")
7176
                     (const_int 4)
7177
                     (const_int 5)
7178
                     (const_int 6)
7179
                     (const_int 7)])))]
7180
  "TARGET_SSE2"
7181
{
7182
  int mask = 0;
7183
  mask |= INTVAL (operands[2]) << 0;
7184
  mask |= INTVAL (operands[3]) << 2;
7185
  mask |= INTVAL (operands[4]) << 4;
7186
  mask |= INTVAL (operands[5]) << 6;
7187
  operands[2] = GEN_INT (mask);
7188
 
7189
  return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7190
}
7191
  [(set_attr "type" "sselog")
7192
   (set_attr "prefix_data16" "0")
7193
   (set_attr "prefix_rep" "1")
7194
   (set_attr "prefix" "maybe_vex")
7195
   (set_attr "length_immediate" "1")
7196
   (set_attr "mode" "TI")])
7197
 
7198
(define_expand "sse2_pshufhw"
7199
  [(match_operand:V8HI 0 "register_operand" "")
7200
   (match_operand:V8HI 1 "nonimmediate_operand" "")
7201
   (match_operand:SI 2 "const_int_operand" "")]
7202
  "TARGET_SSE2"
7203
{
7204
  int mask = INTVAL (operands[2]);
7205
  emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7206
                                 GEN_INT (((mask >> 0) & 3) + 4),
7207
                                 GEN_INT (((mask >> 2) & 3) + 4),
7208
                                 GEN_INT (((mask >> 4) & 3) + 4),
7209
                                 GEN_INT (((mask >> 6) & 3) + 4)));
7210
  DONE;
7211
})
7212
 
7213
(define_insn "sse2_pshufhw_1"
7214
  [(set (match_operand:V8HI 0 "register_operand" "=x")
7215
        (vec_select:V8HI
7216
          (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7217
          (parallel [(const_int 0)
7218
                     (const_int 1)
7219
                     (const_int 2)
7220
                     (const_int 3)
7221
                     (match_operand 2 "const_4_to_7_operand" "")
7222
                     (match_operand 3 "const_4_to_7_operand" "")
7223
                     (match_operand 4 "const_4_to_7_operand" "")
7224
                     (match_operand 5 "const_4_to_7_operand" "")])))]
7225
  "TARGET_SSE2"
7226
{
7227
  int mask = 0;
7228
  mask |= (INTVAL (operands[2]) - 4) << 0;
7229
  mask |= (INTVAL (operands[3]) - 4) << 2;
7230
  mask |= (INTVAL (operands[4]) - 4) << 4;
7231
  mask |= (INTVAL (operands[5]) - 4) << 6;
7232
  operands[2] = GEN_INT (mask);
7233
 
7234
  return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7235
}
7236
  [(set_attr "type" "sselog")
7237
   (set_attr "prefix_rep" "1")
7238
   (set_attr "prefix_data16" "0")
7239
   (set_attr "prefix" "maybe_vex")
7240
   (set_attr "length_immediate" "1")
7241
   (set_attr "mode" "TI")])
7242
 
7243
(define_expand "sse2_loadd"
7244
  [(set (match_operand:V4SI 0 "register_operand" "")
7245
        (vec_merge:V4SI
7246
          (vec_duplicate:V4SI
7247
            (match_operand:SI 1 "nonimmediate_operand" ""))
7248
          (match_dup 2)
7249
          (const_int 1)))]
7250
  "TARGET_SSE"
7251
  "operands[2] = CONST0_RTX (V4SImode);")
7252
 
7253
(define_insn "*avx_loadld"
7254
  [(set (match_operand:V4SI 0 "register_operand"       "=x,Yi,x")
7255
        (vec_merge:V4SI
7256
          (vec_duplicate:V4SI
7257
            (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
7258
          (match_operand:V4SI 1 "reg_or_0_operand"     "C ,C ,x")
7259
          (const_int 1)))]
7260
  "TARGET_AVX"
7261
  "@
7262
   vmovd\t{%2, %0|%0, %2}
7263
   vmovd\t{%2, %0|%0, %2}
7264
   vmovss\t{%2, %1, %0|%0, %1, %2}"
7265
  [(set_attr "type" "ssemov")
7266
   (set_attr "prefix" "vex")
7267
   (set_attr "mode" "TI,TI,V4SF")])
7268
 
7269
(define_insn "sse2_loadld"
7270
  [(set (match_operand:V4SI 0 "register_operand"       "=Y2,Yi,x,x")
7271
        (vec_merge:V4SI
7272
          (vec_duplicate:V4SI
7273
            (match_operand:SI 2 "nonimmediate_operand" "m  ,r ,m,x"))
7274
          (match_operand:V4SI 1 "reg_or_0_operand"     "C  ,C ,C,0")
7275
          (const_int 1)))]
7276
  "TARGET_SSE"
7277
  "@
7278
   movd\t{%2, %0|%0, %2}
7279
   movd\t{%2, %0|%0, %2}
7280
   movss\t{%2, %0|%0, %2}
7281
   movss\t{%2, %0|%0, %2}"
7282
  [(set_attr "type" "ssemov")
7283
   (set_attr "mode" "TI,TI,V4SF,SF")])
7284
 
7285
(define_insn_and_split "sse2_stored"
7286
  [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
7287
        (vec_select:SI
7288
          (match_operand:V4SI 1 "register_operand" "x,Yi")
7289
          (parallel [(const_int 0)])))]
7290
  "TARGET_SSE"
7291
  "#"
7292
  "&& reload_completed
7293
   && (TARGET_INTER_UNIT_MOVES
7294
       || MEM_P (operands [0])
7295
       || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7296
  [(set (match_dup 0) (match_dup 1))]
7297
{
7298
  operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
7299
})
7300
 
7301
(define_insn_and_split "*vec_ext_v4si_mem"
7302
  [(set (match_operand:SI 0 "register_operand" "=r")
7303
        (vec_select:SI
7304
          (match_operand:V4SI 1 "memory_operand" "o")
7305
          (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7306
  ""
7307
  "#"
7308
  "reload_completed"
7309
  [(const_int 0)]
7310
{
7311
  int i = INTVAL (operands[2]);
7312
 
7313
  emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7314
  DONE;
7315
})
7316
 
7317
(define_expand "sse_storeq"
7318
  [(set (match_operand:DI 0 "nonimmediate_operand" "")
7319
        (vec_select:DI
7320
          (match_operand:V2DI 1 "register_operand" "")
7321
          (parallel [(const_int 0)])))]
7322
  "TARGET_SSE"
7323
  "")
7324
 
7325
(define_insn "*sse2_storeq_rex64"
7326
  [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
7327
        (vec_select:DI
7328
          (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7329
          (parallel [(const_int 0)])))]
7330
  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7331
  "@
7332
   #
7333
   #
7334
   %vmov{q}\t{%1, %0|%0, %1}"
7335
  [(set_attr "type" "*,*,imov")
7336
   (set_attr "prefix" "*,*,maybe_vex")
7337
   (set_attr "mode" "*,*,DI")])
7338
 
7339
(define_insn "*sse2_storeq"
7340
  [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
7341
        (vec_select:DI
7342
          (match_operand:V2DI 1 "register_operand" "x")
7343
          (parallel [(const_int 0)])))]
7344
  "TARGET_SSE"
7345
  "#")
7346
 
7347
(define_split
7348
  [(set (match_operand:DI 0 "nonimmediate_operand" "")
7349
        (vec_select:DI
7350
          (match_operand:V2DI 1 "register_operand" "")
7351
          (parallel [(const_int 0)])))]
7352
  "TARGET_SSE
7353
   && reload_completed
7354
   && (TARGET_INTER_UNIT_MOVES
7355
       || MEM_P (operands [0])
7356
       || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7357
  [(set (match_dup 0) (match_dup 1))]
7358
{
7359
  operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
7360
})
7361
 
7362
(define_insn "*vec_extractv2di_1_rex64_avx"
7363
  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7364
        (vec_select:DI
7365
          (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
7366
          (parallel [(const_int 1)])))]
7367
  "TARGET_64BIT
7368
   && TARGET_AVX
7369
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7370
  "@
7371
   vmovhps\t{%1, %0|%0, %1}
7372
   vpsrldq\t{$8, %1, %0|%0, %1, 8}
7373
   vmovq\t{%H1, %0|%0, %H1}
7374
   vmov{q}\t{%H1, %0|%0, %H1}"
7375
  [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7376
   (set_attr "length_immediate" "*,1,*,*")
7377
   (set_attr "memory" "*,none,*,*")
7378
   (set_attr "prefix" "vex")
7379
   (set_attr "mode" "V2SF,TI,TI,DI")])
7380
 
7381
(define_insn "*vec_extractv2di_1_rex64"
7382
  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
7383
        (vec_select:DI
7384
          (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
7385
          (parallel [(const_int 1)])))]
7386
  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7387
  "@
7388
   movhps\t{%1, %0|%0, %1}
7389
   psrldq\t{$8, %0|%0, 8}
7390
   movq\t{%H1, %0|%0, %H1}
7391
   mov{q}\t{%H1, %0|%0, %H1}"
7392
  [(set_attr "type" "ssemov,sseishft1,ssemov,imov")
7393
   (set_attr "length_immediate" "*,1,*,*")
7394
   (set_attr "memory" "*,none,*,*")
7395
   (set_attr "mode" "V2SF,TI,TI,DI")])
7396
 
7397
(define_insn "*vec_extractv2di_1_avx"
7398
  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7399
        (vec_select:DI
7400
          (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7401
          (parallel [(const_int 1)])))]
7402
  "!TARGET_64BIT
7403
   && TARGET_AVX
7404
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7405
  "@
7406
   vmovhps\t{%1, %0|%0, %1}
7407
   vpsrldq\t{$8, %1, %0|%0, %1, 8}
7408
   vmovq\t{%H1, %0|%0, %H1}"
7409
  [(set_attr "type" "ssemov,sseishft1,ssemov")
7410
   (set_attr "length_immediate" "*,1,*")
7411
   (set_attr "memory" "*,none,*")
7412
   (set_attr "prefix" "vex")
7413
   (set_attr "mode" "V2SF,TI,TI")])
7414
 
7415
(define_insn "*vec_extractv2di_1_sse2"
7416
  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7417
        (vec_select:DI
7418
          (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
7419
          (parallel [(const_int 1)])))]
7420
  "!TARGET_64BIT
7421
   && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7422
  "@
7423
   movhps\t{%1, %0|%0, %1}
7424
   psrldq\t{$8, %0|%0, 8}
7425
   movq\t{%H1, %0|%0, %H1}"
7426
  [(set_attr "type" "ssemov,sseishft1,ssemov")
7427
   (set_attr "length_immediate" "*,1,*")
7428
   (set_attr "memory" "*,none,*")
7429
   (set_attr "mode" "V2SF,TI,TI")])
7430
 
7431
;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7432
(define_insn "*vec_extractv2di_1_sse"
7433
  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7434
        (vec_select:DI
7435
          (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
7436
          (parallel [(const_int 1)])))]
7437
  "!TARGET_SSE2 && TARGET_SSE
7438
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7439
  "@
7440
   movhps\t{%1, %0|%0, %1}
7441
   movhlps\t{%1, %0|%0, %1}
7442
   movlps\t{%H1, %0|%0, %H1}"
7443
  [(set_attr "type" "ssemov")
7444
   (set_attr "mode" "V2SF,V4SF,V2SF")])
7445
 
7446
(define_insn "*vec_dupv4si_avx"
7447
  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7448
        (vec_duplicate:V4SI
7449
          (match_operand:SI 1 "register_operand" "x,m")))]
7450
  "TARGET_AVX"
7451
  "@
7452
   vpshufd\t{$0, %1, %0|%0, %1, 0}
7453
   vbroadcastss\t{%1, %0|%0, %1}"
7454
  [(set_attr "type" "sselog1,ssemov")
7455
   (set_attr "length_immediate" "1,0")
7456
   (set_attr "prefix_extra" "0,1")
7457
   (set_attr "prefix" "vex")
7458
   (set_attr "mode" "TI,V4SF")])
7459
 
7460
(define_insn "*vec_dupv4si"
7461
  [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7462
        (vec_duplicate:V4SI
7463
          (match_operand:SI 1 "register_operand" " Y2,0")))]
7464
  "TARGET_SSE"
7465
  "@
7466
   %vpshufd\t{$0, %1, %0|%0, %1, 0}
7467
   shufps\t{$0, %0, %0|%0, %0, 0}"
7468
  [(set_attr "type" "sselog1")
7469
   (set_attr "length_immediate" "1")
7470
   (set_attr "mode" "TI,V4SF")])
7471
 
7472
(define_insn "*vec_dupv2di_avx"
7473
  [(set (match_operand:V2DI 0 "register_operand"     "=x,x")
7474
        (vec_duplicate:V2DI
7475
          (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7476
  "TARGET_AVX"
7477
  "@
7478
   vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7479
   vmovddup\t{%1, %0|%0, %1}"
7480
  [(set_attr "type" "sselog1")
7481
   (set_attr "prefix" "vex")
7482
   (set_attr "mode" "TI,DF")])
7483
 
7484
(define_insn "*vec_dupv2di_sse3"
7485
  [(set (match_operand:V2DI 0 "register_operand"     "=x,x")
7486
        (vec_duplicate:V2DI
7487
          (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7488
  "TARGET_SSE3"
7489
  "@
7490
   punpcklqdq\t%0, %0
7491
   movddup\t{%1, %0|%0, %1}"
7492
  [(set_attr "type" "sselog1")
7493
   (set_attr "mode" "TI,DF")])
7494
 
7495
(define_insn "*vec_dupv2di"
7496
  [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7497
        (vec_duplicate:V2DI
7498
          (match_operand:DI 1 "register_operand" " 0 ,0")))]
7499
  "TARGET_SSE"
7500
  "@
7501
   punpcklqdq\t%0, %0
7502
   movlhps\t%0, %0"
7503
  [(set_attr "type" "sselog1,ssemov")
7504
   (set_attr "mode" "TI,V4SF")])
7505
 
7506
(define_insn "*vec_concatv2si_avx"
7507
  [(set (match_operand:V2SI 0 "register_operand"     "=x,x,x ,*y ,*y")
7508
        (vec_concat:V2SI
7509
          (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
7510
          (match_operand:SI 2 "vector_move_operand"  "rm,x,C ,*ym,C")))]
7511
  "TARGET_AVX"
7512
  "@
7513
   vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7514
   vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7515
   vmovd\t{%1, %0|%0, %1}
7516
   punpckldq\t{%2, %0|%0, %2}
7517
   movd\t{%1, %0|%0, %1}"
7518
  [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7519
   (set_attr "prefix_extra" "1,*,*,*,*")
7520
   (set_attr "length_immediate" "1,*,*,*,*")
7521
   (set (attr "prefix")
7522
     (if_then_else (eq_attr "alternative" "3,4")
7523
       (const_string "orig")
7524
       (const_string "vex")))
7525
   (set_attr "mode" "TI,TI,TI,DI,DI")])
7526
 
7527
(define_insn "*vec_concatv2si_sse4_1"
7528
  [(set (match_operand:V2SI 0 "register_operand"     "=x,x,x ,*y ,*y")
7529
        (vec_concat:V2SI
7530
          (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
7531
          (match_operand:SI 2 "vector_move_operand"  "rm,x,C ,*ym,C")))]
7532
  "TARGET_SSE4_1"
7533
  "@
7534
   pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
7535
   punpckldq\t{%2, %0|%0, %2}
7536
   movd\t{%1, %0|%0, %1}
7537
   punpckldq\t{%2, %0|%0, %2}
7538
   movd\t{%1, %0|%0, %1}"
7539
  [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7540
   (set_attr "prefix_extra" "1,*,*,*,*")
7541
   (set_attr "length_immediate" "1,*,*,*,*")
7542
   (set_attr "mode" "TI,TI,TI,DI,DI")])
7543
 
7544
;; ??? In theory we can match memory for the MMX alternative, but allowing
7545
;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7546
;; alternatives pretty much forces the MMX alternative to be chosen.
7547
(define_insn "*vec_concatv2si_sse2"
7548
  [(set (match_operand:V2SI 0 "register_operand"     "=x,x ,*y,*y")
7549
        (vec_concat:V2SI
7550
          (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7551
          (match_operand:SI 2 "reg_or_0_operand"     " x,C ,*y, C")))]
7552
  "TARGET_SSE2"
7553
  "@
7554
   punpckldq\t{%2, %0|%0, %2}
7555
   movd\t{%1, %0|%0, %1}
7556
   punpckldq\t{%2, %0|%0, %2}
7557
   movd\t{%1, %0|%0, %1}"
7558
  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7559
   (set_attr "mode" "TI,TI,DI,DI")])
7560
 
7561
(define_insn "*vec_concatv2si_sse"
7562
  [(set (match_operand:V2SI 0 "register_operand"     "=x,x,*y,*y")
7563
        (vec_concat:V2SI
7564
          (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7565
          (match_operand:SI 2 "reg_or_0_operand"     " x,C,*y,C")))]
7566
  "TARGET_SSE"
7567
  "@
7568
   unpcklps\t{%2, %0|%0, %2}
7569
   movss\t{%1, %0|%0, %1}
7570
   punpckldq\t{%2, %0|%0, %2}
7571
   movd\t{%1, %0|%0, %1}"
7572
  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7573
   (set_attr "mode" "V4SF,V4SF,DI,DI")])
7574
 
7575
(define_insn "*vec_concatv4si_1_avx"
7576
  [(set (match_operand:V4SI 0 "register_operand"       "=x,x")
7577
        (vec_concat:V4SI
7578
          (match_operand:V2SI 1 "register_operand"     " x,x")
7579
          (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
7580
  "TARGET_AVX"
7581
  "@
7582
   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7583
   vmovhps\t{%2, %1, %0|%0, %1, %2}"
7584
  [(set_attr "type" "sselog,ssemov")
7585
   (set_attr "prefix" "vex")
7586
   (set_attr "mode" "TI,V2SF")])
7587
 
7588
(define_insn "*vec_concatv4si_1"
7589
  [(set (match_operand:V4SI 0 "register_operand"       "=Y2,x,x")
7590
        (vec_concat:V4SI
7591
          (match_operand:V2SI 1 "register_operand"     " 0 ,0,0")
7592
          (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
7593
  "TARGET_SSE"
7594
  "@
7595
   punpcklqdq\t{%2, %0|%0, %2}
7596
   movlhps\t{%2, %0|%0, %2}
7597
   movhps\t{%2, %0|%0, %2}"
7598
  [(set_attr "type" "sselog,ssemov,ssemov")
7599
   (set_attr "mode" "TI,V4SF,V2SF")])
7600
 
7601
(define_insn "*vec_concatv2di_avx"
7602
  [(set (match_operand:V2DI 0 "register_operand"     "=x,?x,x,x")
7603
        (vec_concat:V2DI
7604
          (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
7605
          (match_operand:DI 2 "vector_move_operand"  " C, C,x,m")))]
7606
  "!TARGET_64BIT && TARGET_AVX"
7607
  "@
7608
   vmovq\t{%1, %0|%0, %1}
7609
   movq2dq\t{%1, %0|%0, %1}
7610
   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7611
   vmovhps\t{%2, %1, %0|%0, %1, %2}"
7612
  [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
7613
   (set (attr "prefix")
7614
     (if_then_else (eq_attr "alternative" "1")
7615
       (const_string "orig")
7616
       (const_string "vex")))
7617
   (set_attr "mode" "TI,TI,TI,V2SF")])
7618
 
7619
(define_insn "vec_concatv2di"
7620
  [(set (match_operand:V2DI 0 "register_operand"     "=Y2 ,?Y2,Y2,x,x")
7621
        (vec_concat:V2DI
7622
          (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
7623
          (match_operand:DI 2 "vector_move_operand"  " C  ,  C,Y2,x,m")))]
7624
  "!TARGET_64BIT && TARGET_SSE"
7625
  "@
7626
   movq\t{%1, %0|%0, %1}
7627
   movq2dq\t{%1, %0|%0, %1}
7628
   punpcklqdq\t{%2, %0|%0, %2}
7629
   movlhps\t{%2, %0|%0, %2}
7630
   movhps\t{%2, %0|%0, %2}"
7631
  [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
7632
   (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
7633
 
7634
(define_insn "*vec_concatv2di_rex64_avx"
7635
  [(set (match_operand:V2DI 0 "register_operand"     "=x,x,Yi,!x,x,x")
7636
        (vec_concat:V2DI
7637
          (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
7638
          (match_operand:DI 2 "vector_move_operand"  "rm,C,C ,C ,x,m")))]
7639
  "TARGET_64BIT && TARGET_AVX"
7640
  "@
7641
   vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
7642
   vmovq\t{%1, %0|%0, %1}
7643
   vmovq\t{%1, %0|%0, %1}
7644
   movq2dq\t{%1, %0|%0, %1}
7645
   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7646
   vmovhps\t{%2, %1, %0|%0, %1, %2}"
7647
  [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7648
   (set_attr "prefix_extra" "1,*,*,*,*,*")
7649
   (set_attr "length_immediate" "1,*,*,*,*,*")
7650
   (set (attr "prefix")
7651
     (if_then_else (eq_attr "alternative" "3")
7652
       (const_string "orig")
7653
       (const_string "vex")))
7654
   (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7655
 
7656
(define_insn "*vec_concatv2di_rex64_sse4_1"
7657
  [(set (match_operand:V2DI 0 "register_operand"     "=x ,x ,Yi,!x,x,x,x")
7658
        (vec_concat:V2DI
7659
          (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
7660
          (match_operand:DI 2 "vector_move_operand"  " rm,C ,C ,C ,x,x,m")))]
7661
  "TARGET_64BIT && TARGET_SSE4_1"
7662
  "@
7663
   pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
7664
   movq\t{%1, %0|%0, %1}
7665
   movq\t{%1, %0|%0, %1}
7666
   movq2dq\t{%1, %0|%0, %1}
7667
   punpcklqdq\t{%2, %0|%0, %2}
7668
   movlhps\t{%2, %0|%0, %2}
7669
   movhps\t{%2, %0|%0, %2}"
7670
  [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7671
   (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7672
   (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7673
   (set_attr "length_immediate" "1,*,*,*,*,*,*")
7674
   (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7675
 
7676
(define_insn "*vec_concatv2di_rex64_sse"
7677
  [(set (match_operand:V2DI 0 "register_operand"     "=Y2 ,Yi,!Y2,Y2,x,x")
7678
        (vec_concat:V2DI
7679
          (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
7680
          (match_operand:DI 2 "vector_move_operand"  " C  ,C ,C  ,Y2,x,m")))]
7681
  "TARGET_64BIT && TARGET_SSE"
7682
  "@
7683
   movq\t{%1, %0|%0, %1}
7684
   movq\t{%1, %0|%0, %1}
7685
   movq2dq\t{%1, %0|%0, %1}
7686
   punpcklqdq\t{%2, %0|%0, %2}
7687
   movlhps\t{%2, %0|%0, %2}
7688
   movhps\t{%2, %0|%0, %2}"
7689
  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7690
   (set_attr "prefix_rex" "*,1,*,*,*,*")
7691
   (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7692
 
7693
(define_expand "vec_unpacku_hi_v16qi"
7694
  [(match_operand:V8HI 0 "register_operand" "")
7695
   (match_operand:V16QI 1 "register_operand" "")]
7696
  "TARGET_SSE2"
7697
{
7698
  if (TARGET_SSE4_1)
7699
    ix86_expand_sse4_unpack (operands, true, true);
7700
  else
7701
    ix86_expand_sse_unpack (operands, true, true);
7702
  DONE;
7703
})
7704
 
7705
(define_expand "vec_unpacks_hi_v16qi"
7706
  [(match_operand:V8HI 0 "register_operand" "")
7707
   (match_operand:V16QI 1 "register_operand" "")]
7708
  "TARGET_SSE2"
7709
{
7710
  if (TARGET_SSE4_1)
7711
    ix86_expand_sse4_unpack (operands, false, true);
7712
  else
7713
    ix86_expand_sse_unpack (operands, false, true);
7714
  DONE;
7715
})
7716
 
7717
(define_expand "vec_unpacku_lo_v16qi"
7718
  [(match_operand:V8HI 0 "register_operand" "")
7719
   (match_operand:V16QI 1 "register_operand" "")]
7720
  "TARGET_SSE2"
7721
{
7722
  if (TARGET_SSE4_1)
7723
    ix86_expand_sse4_unpack (operands, true, false);
7724
  else
7725
    ix86_expand_sse_unpack (operands, true, false);
7726
  DONE;
7727
})
7728
 
7729
(define_expand "vec_unpacks_lo_v16qi"
7730
  [(match_operand:V8HI 0 "register_operand" "")
7731
   (match_operand:V16QI 1 "register_operand" "")]
7732
  "TARGET_SSE2"
7733
{
7734
  if (TARGET_SSE4_1)
7735
    ix86_expand_sse4_unpack (operands, false, false);
7736
  else
7737
    ix86_expand_sse_unpack (operands, false, false);
7738
  DONE;
7739
})
7740
 
7741
(define_expand "vec_unpacku_hi_v8hi"
7742
  [(match_operand:V4SI 0 "register_operand" "")
7743
   (match_operand:V8HI 1 "register_operand" "")]
7744
  "TARGET_SSE2"
7745
{
7746
  if (TARGET_SSE4_1)
7747
    ix86_expand_sse4_unpack (operands, true, true);
7748
  else
7749
    ix86_expand_sse_unpack (operands, true, true);
7750
  DONE;
7751
})
7752
 
7753
(define_expand "vec_unpacks_hi_v8hi"
7754
  [(match_operand:V4SI 0 "register_operand" "")
7755
   (match_operand:V8HI 1 "register_operand" "")]
7756
  "TARGET_SSE2"
7757
{
7758
  if (TARGET_SSE4_1)
7759
    ix86_expand_sse4_unpack (operands, false, true);
7760
  else
7761
    ix86_expand_sse_unpack (operands, false, true);
7762
  DONE;
7763
})
7764
 
7765
(define_expand "vec_unpacku_lo_v8hi"
7766
  [(match_operand:V4SI 0 "register_operand" "")
7767
   (match_operand:V8HI 1 "register_operand" "")]
7768
  "TARGET_SSE2"
7769
{
7770
  if (TARGET_SSE4_1)
7771
    ix86_expand_sse4_unpack (operands, true, false);
7772
  else
7773
    ix86_expand_sse_unpack (operands, true, false);
7774
  DONE;
7775
})
7776
 
7777
(define_expand "vec_unpacks_lo_v8hi"
7778
  [(match_operand:V4SI 0 "register_operand" "")
7779
   (match_operand:V8HI 1 "register_operand" "")]
7780
  "TARGET_SSE2"
7781
{
7782
  if (TARGET_SSE4_1)
7783
    ix86_expand_sse4_unpack (operands, false, false);
7784
  else
7785
    ix86_expand_sse_unpack (operands, false, false);
7786
  DONE;
7787
})
7788
 
7789
(define_expand "vec_unpacku_hi_v4si"
7790
  [(match_operand:V2DI 0 "register_operand" "")
7791
   (match_operand:V4SI 1 "register_operand" "")]
7792
  "TARGET_SSE2"
7793
{
7794
  if (TARGET_SSE4_1)
7795
    ix86_expand_sse4_unpack (operands, true, true);
7796
  else
7797
    ix86_expand_sse_unpack (operands, true, true);
7798
  DONE;
7799
})
7800
 
7801
(define_expand "vec_unpacks_hi_v4si"
7802
  [(match_operand:V2DI 0 "register_operand" "")
7803
   (match_operand:V4SI 1 "register_operand" "")]
7804
  "TARGET_SSE2"
7805
{
7806
  if (TARGET_SSE4_1)
7807
    ix86_expand_sse4_unpack (operands, false, true);
7808
  else
7809
    ix86_expand_sse_unpack (operands, false, true);
7810
  DONE;
7811
})
7812
 
7813
(define_expand "vec_unpacku_lo_v4si"
7814
  [(match_operand:V2DI 0 "register_operand" "")
7815
   (match_operand:V4SI 1 "register_operand" "")]
7816
  "TARGET_SSE2"
7817
{
7818
  if (TARGET_SSE4_1)
7819
    ix86_expand_sse4_unpack (operands, true, false);
7820
  else
7821
    ix86_expand_sse_unpack (operands, true, false);
7822
  DONE;
7823
})
7824
 
7825
(define_expand "vec_unpacks_lo_v4si"
7826
  [(match_operand:V2DI 0 "register_operand" "")
7827
   (match_operand:V4SI 1 "register_operand" "")]
7828
  "TARGET_SSE2"
7829
{
7830
  if (TARGET_SSE4_1)
7831
    ix86_expand_sse4_unpack (operands, false, false);
7832
  else
7833
    ix86_expand_sse_unpack (operands, false, false);
7834
  DONE;
7835
})
7836
 
7837
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7838
;;
7839
;; Miscellaneous
7840
;;
7841
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7842
 
7843
(define_expand "sse2_uavgv16qi3"
7844
  [(set (match_operand:V16QI 0 "register_operand" "")
7845
        (truncate:V16QI
7846
          (lshiftrt:V16HI
7847
            (plus:V16HI
7848
              (plus:V16HI
7849
                (zero_extend:V16HI
7850
                  (match_operand:V16QI 1 "nonimmediate_operand" ""))
7851
                (zero_extend:V16HI
7852
                  (match_operand:V16QI 2 "nonimmediate_operand" "")))
7853
              (const_vector:V16QI [(const_int 1) (const_int 1)
7854
                                   (const_int 1) (const_int 1)
7855
                                   (const_int 1) (const_int 1)
7856
                                   (const_int 1) (const_int 1)
7857
                                   (const_int 1) (const_int 1)
7858
                                   (const_int 1) (const_int 1)
7859
                                   (const_int 1) (const_int 1)
7860
                                   (const_int 1) (const_int 1)]))
7861
            (const_int 1))))]
7862
  "TARGET_SSE2"
7863
  "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7864
 
7865
(define_insn "*avx_uavgv16qi3"
7866
  [(set (match_operand:V16QI 0 "register_operand" "=x")
7867
        (truncate:V16QI
7868
          (lshiftrt:V16HI
7869
            (plus:V16HI
7870
              (plus:V16HI
7871
                (zero_extend:V16HI
7872
                  (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
7873
                (zero_extend:V16HI
7874
                  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7875
              (const_vector:V16QI [(const_int 1) (const_int 1)
7876
                                   (const_int 1) (const_int 1)
7877
                                   (const_int 1) (const_int 1)
7878
                                   (const_int 1) (const_int 1)
7879
                                   (const_int 1) (const_int 1)
7880
                                   (const_int 1) (const_int 1)
7881
                                   (const_int 1) (const_int 1)
7882
                                   (const_int 1) (const_int 1)]))
7883
            (const_int 1))))]
7884
  "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7885
  "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7886
  [(set_attr "type" "sseiadd")
7887
   (set_attr "prefix" "vex")
7888
   (set_attr "mode" "TI")])
7889
 
7890
(define_insn "*sse2_uavgv16qi3"
7891
  [(set (match_operand:V16QI 0 "register_operand" "=x")
7892
        (truncate:V16QI
7893
          (lshiftrt:V16HI
7894
            (plus:V16HI
7895
              (plus:V16HI
7896
                (zero_extend:V16HI
7897
                  (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
7898
                (zero_extend:V16HI
7899
                  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
7900
              (const_vector:V16QI [(const_int 1) (const_int 1)
7901
                                   (const_int 1) (const_int 1)
7902
                                   (const_int 1) (const_int 1)
7903
                                   (const_int 1) (const_int 1)
7904
                                   (const_int 1) (const_int 1)
7905
                                   (const_int 1) (const_int 1)
7906
                                   (const_int 1) (const_int 1)
7907
                                   (const_int 1) (const_int 1)]))
7908
            (const_int 1))))]
7909
  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7910
  "pavgb\t{%2, %0|%0, %2}"
7911
  [(set_attr "type" "sseiadd")
7912
   (set_attr "prefix_data16" "1")
7913
   (set_attr "mode" "TI")])
7914
 
7915
(define_expand "sse2_uavgv8hi3"
7916
  [(set (match_operand:V8HI 0 "register_operand" "")
7917
        (truncate:V8HI
7918
          (lshiftrt:V8SI
7919
            (plus:V8SI
7920
              (plus:V8SI
7921
                (zero_extend:V8SI
7922
                  (match_operand:V8HI 1 "nonimmediate_operand" ""))
7923
                (zero_extend:V8SI
7924
                  (match_operand:V8HI 2 "nonimmediate_operand" "")))
7925
              (const_vector:V8HI [(const_int 1) (const_int 1)
7926
                                  (const_int 1) (const_int 1)
7927
                                  (const_int 1) (const_int 1)
7928
                                  (const_int 1) (const_int 1)]))
7929
            (const_int 1))))]
7930
  "TARGET_SSE2"
7931
  "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7932
 
7933
(define_insn "*avx_uavgv8hi3"
7934
  [(set (match_operand:V8HI 0 "register_operand" "=x")
7935
        (truncate:V8HI
7936
          (lshiftrt:V8SI
7937
            (plus:V8SI
7938
              (plus:V8SI
7939
                (zero_extend:V8SI
7940
                  (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
7941
                (zero_extend:V8SI
7942
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7943
              (const_vector:V8HI [(const_int 1) (const_int 1)
7944
                                  (const_int 1) (const_int 1)
7945
                                  (const_int 1) (const_int 1)
7946
                                  (const_int 1) (const_int 1)]))
7947
            (const_int 1))))]
7948
  "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7949
  "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7950
  [(set_attr "type" "sseiadd")
7951
   (set_attr "prefix" "vex")
7952
   (set_attr "mode" "TI")])
7953
 
7954
(define_insn "*sse2_uavgv8hi3"
7955
  [(set (match_operand:V8HI 0 "register_operand" "=x")
7956
        (truncate:V8HI
7957
          (lshiftrt:V8SI
7958
            (plus:V8SI
7959
              (plus:V8SI
7960
                (zero_extend:V8SI
7961
                  (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
7962
                (zero_extend:V8SI
7963
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
7964
              (const_vector:V8HI [(const_int 1) (const_int 1)
7965
                                  (const_int 1) (const_int 1)
7966
                                  (const_int 1) (const_int 1)
7967
                                  (const_int 1) (const_int 1)]))
7968
            (const_int 1))))]
7969
  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7970
  "pavgw\t{%2, %0|%0, %2}"
7971
  [(set_attr "type" "sseiadd")
7972
   (set_attr "prefix_data16" "1")
7973
   (set_attr "mode" "TI")])
7974
 
7975
;; The correct representation for this is absolutely enormous, and
7976
;; surely not generally useful.
7977
(define_insn "*avx_psadbw"
7978
  [(set (match_operand:V2DI 0 "register_operand" "=x")
7979
        (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
7980
                      (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7981
                     UNSPEC_PSADBW))]
7982
  "TARGET_AVX"
7983
  "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7984
  [(set_attr "type" "sseiadd")
7985
   (set_attr "prefix" "vex")
7986
   (set_attr "mode" "TI")])
7987
 
7988
(define_insn "sse2_psadbw"
7989
  [(set (match_operand:V2DI 0 "register_operand" "=x")
7990
        (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
7991
                      (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7992
                     UNSPEC_PSADBW))]
7993
  "TARGET_SSE2"
7994
  "psadbw\t{%2, %0|%0, %2}"
7995
  [(set_attr "type" "sseiadd")
7996
   (set_attr "atom_unit" "simul")
7997
   (set_attr "prefix_data16" "1")
7998
   (set_attr "mode" "TI")])
7999
 
8000
(define_insn "avx_movmskp256"
8001
  [(set (match_operand:SI 0 "register_operand" "=r")
8002
        (unspec:SI
8003
          [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
8004
          UNSPEC_MOVMSK))]
8005
  "AVX256_VEC_FLOAT_MODE_P (mode)"
8006
  "vmovmskp\t{%1, %0|%0, %1}"
8007
  [(set_attr "type" "ssecvt")
8008
   (set_attr "prefix" "vex")
8009
   (set_attr "mode" "")])
8010
 
8011
(define_insn "_movmskp"
8012
  [(set (match_operand:SI 0 "register_operand" "=r")
8013
        (unspec:SI
8014
          [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
8015
          UNSPEC_MOVMSK))]
8016
  "SSE_VEC_FLOAT_MODE_P (mode)"
8017
  "%vmovmskp\t{%1, %0|%0, %1}"
8018
  [(set_attr "type" "ssemov")
8019
   (set_attr "prefix" "maybe_vex")
8020
   (set_attr "mode" "")])
8021
 
8022
(define_insn "sse2_pmovmskb"
8023
  [(set (match_operand:SI 0 "register_operand" "=r")
8024
        (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
8025
                   UNSPEC_MOVMSK))]
8026
  "TARGET_SSE2"
8027
  "%vpmovmskb\t{%1, %0|%0, %1}"
8028
  [(set_attr "type" "ssemov")
8029
   (set_attr "prefix_data16" "1")
8030
   (set_attr "prefix" "maybe_vex")
8031
   (set_attr "mode" "SI")])
8032
 
8033
(define_expand "sse2_maskmovdqu"
8034
  [(set (match_operand:V16QI 0 "memory_operand" "")
8035
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
8036
                       (match_operand:V16QI 2 "register_operand" "")
8037
                       (match_dup 0)]
8038
                      UNSPEC_MASKMOV))]
8039
  "TARGET_SSE2"
8040
  "")
8041
 
8042
(define_insn "*sse2_maskmovdqu"
8043
  [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
8044
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8045
                       (match_operand:V16QI 2 "register_operand" "x")
8046
                       (mem:V16QI (match_dup 0))]
8047
                      UNSPEC_MASKMOV))]
8048
  "TARGET_SSE2 && !TARGET_64BIT"
8049
  ;; @@@ check ordering of operands in intel/nonintel syntax
8050
  "%vmaskmovdqu\t{%2, %1|%1, %2}"
8051
  [(set_attr "type" "ssemov")
8052
   (set_attr "prefix_data16" "1")
8053
   ;; The implicit %rdi operand confuses default length_vex computation.
8054
   (set_attr "length_vex" "3")
8055
   (set_attr "prefix" "maybe_vex")
8056
   (set_attr "mode" "TI")])
8057
 
8058
(define_insn "*sse2_maskmovdqu_rex64"
8059
  [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
8060
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8061
                       (match_operand:V16QI 2 "register_operand" "x")
8062
                       (mem:V16QI (match_dup 0))]
8063
                      UNSPEC_MASKMOV))]
8064
  "TARGET_SSE2 && TARGET_64BIT"
8065
  ;; @@@ check ordering of operands in intel/nonintel syntax
8066
  "%vmaskmovdqu\t{%2, %1|%1, %2}"
8067
  [(set_attr "type" "ssemov")
8068
   (set_attr "prefix_data16" "1")
8069
   ;; The implicit %rdi operand confuses default length_vex computation.
8070
   (set (attr "length_vex")
8071
     (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
8072
   (set_attr "prefix" "maybe_vex")
8073
   (set_attr "mode" "TI")])
8074
 
8075
(define_insn "sse_ldmxcsr"
8076
  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
8077
                    UNSPECV_LDMXCSR)]
8078
  "TARGET_SSE"
8079
  "%vldmxcsr\t%0"
8080
  [(set_attr "type" "sse")
8081
   (set_attr "atom_sse_attr" "mxcsr")
8082
   (set_attr "prefix" "maybe_vex")
8083
   (set_attr "memory" "load")])
8084
 
8085
(define_insn "sse_stmxcsr"
8086
  [(set (match_operand:SI 0 "memory_operand" "=m")
8087
        (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
8088
  "TARGET_SSE"
8089
  "%vstmxcsr\t%0"
8090
  [(set_attr "type" "sse")
8091
   (set_attr "atom_sse_attr" "mxcsr")
8092
   (set_attr "prefix" "maybe_vex")
8093
   (set_attr "memory" "store")])
8094
 
8095
(define_expand "sse_sfence"
8096
  [(set (match_dup 0)
8097
        (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8098
  "TARGET_SSE || TARGET_3DNOW_A"
8099
{
8100
  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8101
  MEM_VOLATILE_P (operands[0]) = 1;
8102
})
8103
 
8104
(define_insn "*sse_sfence"
8105
  [(set (match_operand:BLK 0 "" "")
8106
        (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8107
  "TARGET_SSE || TARGET_3DNOW_A"
8108
  "sfence"
8109
  [(set_attr "type" "sse")
8110
   (set_attr "length_address" "0")
8111
   (set_attr "atom_sse_attr" "fence")
8112
   (set_attr "memory" "unknown")])
8113
 
8114
(define_insn "sse2_clflush"
8115
  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8116
                    UNSPECV_CLFLUSH)]
8117
  "TARGET_SSE2"
8118
  "clflush\t%a0"
8119
  [(set_attr "type" "sse")
8120
   (set_attr "atom_sse_attr" "fence")
8121
   (set_attr "memory" "unknown")])
8122
 
8123
(define_expand "sse2_mfence"
8124
  [(set (match_dup 0)
8125
        (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8126
  "TARGET_SSE2"
8127
{
8128
  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8129
  MEM_VOLATILE_P (operands[0]) = 1;
8130
})
8131
 
8132
(define_insn "*sse2_mfence"
8133
  [(set (match_operand:BLK 0 "" "")
8134
        (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8135
  "TARGET_64BIT || TARGET_SSE2"
8136
  "mfence"
8137
  [(set_attr "type" "sse")
8138
   (set_attr "length_address" "0")
8139
   (set_attr "atom_sse_attr" "fence")
8140
   (set_attr "memory" "unknown")])
8141
 
8142
(define_expand "sse2_lfence"
8143
  [(set (match_dup 0)
8144
        (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8145
  "TARGET_SSE2"
8146
{
8147
  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8148
  MEM_VOLATILE_P (operands[0]) = 1;
8149
})
8150
 
8151
(define_insn "*sse2_lfence"
8152
  [(set (match_operand:BLK 0 "" "")
8153
        (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8154
  "TARGET_SSE2"
8155
  "lfence"
8156
  [(set_attr "type" "sse")
8157
   (set_attr "length_address" "0")
8158
   (set_attr "atom_sse_attr" "lfence")
8159
   (set_attr "memory" "unknown")])
8160
 
8161
(define_insn "sse3_mwait"
8162
  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8163
                     (match_operand:SI 1 "register_operand" "c")]
8164
                    UNSPECV_MWAIT)]
8165
  "TARGET_SSE3"
8166
;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8167
;; Since 32bit register operands are implicitly zero extended to 64bit,
8168
;; we only need to set up 32bit registers.
8169
  "mwait"
8170
  [(set_attr "length" "3")])
8171
 
8172
(define_insn "sse3_monitor"
8173
  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8174
                     (match_operand:SI 1 "register_operand" "c")
8175
                     (match_operand:SI 2 "register_operand" "d")]
8176
                    UNSPECV_MONITOR)]
8177
  "TARGET_SSE3 && !TARGET_64BIT"
8178
  "monitor\t%0, %1, %2"
8179
  [(set_attr "length" "3")])
8180
 
8181
(define_insn "sse3_monitor64"
8182
  [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8183
                     (match_operand:SI 1 "register_operand" "c")
8184
                     (match_operand:SI 2 "register_operand" "d")]
8185
                    UNSPECV_MONITOR)]
8186
  "TARGET_SSE3 && TARGET_64BIT"
8187
;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8188
;; RCX and RDX are used.  Since 32bit register operands are implicitly
8189
;; zero extended to 64bit, we only need to set up 32bit registers.
8190
  "monitor"
8191
  [(set_attr "length" "3")])
8192
 
8193
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8194
;;
8195
;; SSSE3 instructions
8196
;;
8197
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8198
 
8199
(define_insn "*avx_phaddwv8hi3"
8200
  [(set (match_operand:V8HI 0 "register_operand" "=x")
8201
        (vec_concat:V8HI
8202
          (vec_concat:V4HI
8203
            (vec_concat:V2HI
8204
              (plus:HI
8205
                (vec_select:HI
8206
                  (match_operand:V8HI 1 "register_operand" "x")
8207
                  (parallel [(const_int 0)]))
8208
                (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8209
              (plus:HI
8210
                (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8211
                (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8212
            (vec_concat:V2HI
8213
              (plus:HI
8214
                (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8215
                (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8216
              (plus:HI
8217
                (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8218
                (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8219
          (vec_concat:V4HI
8220
            (vec_concat:V2HI
8221
              (plus:HI
8222
                (vec_select:HI
8223
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8224
                  (parallel [(const_int 0)]))
8225
                (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8226
              (plus:HI
8227
                (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8228
                (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8229
            (vec_concat:V2HI
8230
              (plus:HI
8231
                (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8232
                (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8233
              (plus:HI
8234
                (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8235
                (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8236
  "TARGET_AVX"
8237
  "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8238
  [(set_attr "type" "sseiadd")
8239
   (set_attr "prefix_extra" "1")
8240
   (set_attr "prefix" "vex")
8241
   (set_attr "mode" "TI")])
8242
 
8243
(define_insn "ssse3_phaddwv8hi3"
8244
  [(set (match_operand:V8HI 0 "register_operand" "=x")
8245
        (vec_concat:V8HI
8246
          (vec_concat:V4HI
8247
            (vec_concat:V2HI
8248
              (plus:HI
8249
                (vec_select:HI
8250
                  (match_operand:V8HI 1 "register_operand" "0")
8251
                  (parallel [(const_int 0)]))
8252
                (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8253
              (plus:HI
8254
                (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8255
                (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8256
            (vec_concat:V2HI
8257
              (plus:HI
8258
                (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8259
                (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8260
              (plus:HI
8261
                (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8262
                (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8263
          (vec_concat:V4HI
8264
            (vec_concat:V2HI
8265
              (plus:HI
8266
                (vec_select:HI
8267
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8268
                  (parallel [(const_int 0)]))
8269
                (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8270
              (plus:HI
8271
                (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8272
                (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8273
            (vec_concat:V2HI
8274
              (plus:HI
8275
                (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8276
                (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8277
              (plus:HI
8278
                (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8279
                (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8280
  "TARGET_SSSE3"
8281
  "phaddw\t{%2, %0|%0, %2}"
8282
  [(set_attr "type" "sseiadd")
8283
   (set_attr "atom_unit" "complex")
8284
   (set_attr "prefix_data16" "1")
8285
   (set_attr "prefix_extra" "1")
8286
   (set_attr "mode" "TI")])
8287
 
8288
(define_insn "ssse3_phaddwv4hi3"
8289
  [(set (match_operand:V4HI 0 "register_operand" "=y")
8290
        (vec_concat:V4HI
8291
          (vec_concat:V2HI
8292
            (plus:HI
8293
              (vec_select:HI
8294
                (match_operand:V4HI 1 "register_operand" "0")
8295
                (parallel [(const_int 0)]))
8296
              (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8297
            (plus:HI
8298
              (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8299
              (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8300
          (vec_concat:V2HI
8301
            (plus:HI
8302
              (vec_select:HI
8303
                (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8304
                (parallel [(const_int 0)]))
8305
              (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8306
            (plus:HI
8307
              (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8308
              (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8309
  "TARGET_SSSE3"
8310
  "phaddw\t{%2, %0|%0, %2}"
8311
  [(set_attr "type" "sseiadd")
8312
   (set_attr "atom_unit" "complex")
8313
   (set_attr "prefix_extra" "1")
8314
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8315
   (set_attr "mode" "DI")])
8316
 
8317
(define_insn "*avx_phadddv4si3"
8318
  [(set (match_operand:V4SI 0 "register_operand" "=x")
8319
        (vec_concat:V4SI
8320
          (vec_concat:V2SI
8321
            (plus:SI
8322
              (vec_select:SI
8323
                (match_operand:V4SI 1 "register_operand" "x")
8324
                (parallel [(const_int 0)]))
8325
              (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8326
            (plus:SI
8327
              (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8328
              (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8329
          (vec_concat:V2SI
8330
            (plus:SI
8331
              (vec_select:SI
8332
                (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8333
                (parallel [(const_int 0)]))
8334
              (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8335
            (plus:SI
8336
              (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8337
              (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8338
  "TARGET_AVX"
8339
  "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8340
  [(set_attr "type" "sseiadd")
8341
   (set_attr "prefix_extra" "1")
8342
   (set_attr "prefix" "vex")
8343
   (set_attr "mode" "TI")])
8344
 
8345
(define_insn "ssse3_phadddv4si3"
8346
  [(set (match_operand:V4SI 0 "register_operand" "=x")
8347
        (vec_concat:V4SI
8348
          (vec_concat:V2SI
8349
            (plus:SI
8350
              (vec_select:SI
8351
                (match_operand:V4SI 1 "register_operand" "0")
8352
                (parallel [(const_int 0)]))
8353
              (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8354
            (plus:SI
8355
              (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8356
              (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8357
          (vec_concat:V2SI
8358
            (plus:SI
8359
              (vec_select:SI
8360
                (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8361
                (parallel [(const_int 0)]))
8362
              (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8363
            (plus:SI
8364
              (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8365
              (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8366
  "TARGET_SSSE3"
8367
  "phaddd\t{%2, %0|%0, %2}"
8368
  [(set_attr "type" "sseiadd")
8369
   (set_attr "atom_unit" "complex")
8370
   (set_attr "prefix_data16" "1")
8371
   (set_attr "prefix_extra" "1")
8372
   (set_attr "mode" "TI")])
8373
 
8374
(define_insn "ssse3_phadddv2si3"
8375
  [(set (match_operand:V2SI 0 "register_operand" "=y")
8376
        (vec_concat:V2SI
8377
          (plus:SI
8378
            (vec_select:SI
8379
              (match_operand:V2SI 1 "register_operand" "0")
8380
              (parallel [(const_int 0)]))
8381
            (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8382
          (plus:SI
8383
            (vec_select:SI
8384
              (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8385
              (parallel [(const_int 0)]))
8386
            (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8387
  "TARGET_SSSE3"
8388
  "phaddd\t{%2, %0|%0, %2}"
8389
  [(set_attr "type" "sseiadd")
8390
   (set_attr "atom_unit" "complex")
8391
   (set_attr "prefix_extra" "1")
8392
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8393
   (set_attr "mode" "DI")])
8394
 
8395
(define_insn "*avx_phaddswv8hi3"
8396
  [(set (match_operand:V8HI 0 "register_operand" "=x")
8397
        (vec_concat:V8HI
8398
          (vec_concat:V4HI
8399
            (vec_concat:V2HI
8400
              (ss_plus:HI
8401
                (vec_select:HI
8402
                  (match_operand:V8HI 1 "register_operand" "x")
8403
                  (parallel [(const_int 0)]))
8404
                (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8405
              (ss_plus:HI
8406
                (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8407
                (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8408
            (vec_concat:V2HI
8409
              (ss_plus:HI
8410
                (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8411
                (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8412
              (ss_plus:HI
8413
                (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8414
                (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8415
          (vec_concat:V4HI
8416
            (vec_concat:V2HI
8417
              (ss_plus:HI
8418
                (vec_select:HI
8419
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8420
                  (parallel [(const_int 0)]))
8421
                (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8422
              (ss_plus:HI
8423
                (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8424
                (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8425
            (vec_concat:V2HI
8426
              (ss_plus:HI
8427
                (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8428
                (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8429
              (ss_plus:HI
8430
                (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8431
                (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8432
  "TARGET_AVX"
8433
  "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8434
  [(set_attr "type" "sseiadd")
8435
   (set_attr "prefix_extra" "1")
8436
   (set_attr "prefix" "vex")
8437
   (set_attr "mode" "TI")])
8438
 
8439
(define_insn "ssse3_phaddswv8hi3"
8440
  [(set (match_operand:V8HI 0 "register_operand" "=x")
8441
        (vec_concat:V8HI
8442
          (vec_concat:V4HI
8443
            (vec_concat:V2HI
8444
              (ss_plus:HI
8445
                (vec_select:HI
8446
                  (match_operand:V8HI 1 "register_operand" "0")
8447
                  (parallel [(const_int 0)]))
8448
                (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8449
              (ss_plus:HI
8450
                (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8451
                (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8452
            (vec_concat:V2HI
8453
              (ss_plus:HI
8454
                (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8455
                (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8456
              (ss_plus:HI
8457
                (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8458
                (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8459
          (vec_concat:V4HI
8460
            (vec_concat:V2HI
8461
              (ss_plus:HI
8462
                (vec_select:HI
8463
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8464
                  (parallel [(const_int 0)]))
8465
                (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8466
              (ss_plus:HI
8467
                (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8468
                (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8469
            (vec_concat:V2HI
8470
              (ss_plus:HI
8471
                (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8472
                (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8473
              (ss_plus:HI
8474
                (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8475
                (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8476
  "TARGET_SSSE3"
8477
  "phaddsw\t{%2, %0|%0, %2}"
8478
  [(set_attr "type" "sseiadd")
8479
   (set_attr "atom_unit" "complex")
8480
   (set_attr "prefix_data16" "1")
8481
   (set_attr "prefix_extra" "1")
8482
   (set_attr "mode" "TI")])
8483
 
8484
(define_insn "ssse3_phaddswv4hi3"
8485
  [(set (match_operand:V4HI 0 "register_operand" "=y")
8486
        (vec_concat:V4HI
8487
          (vec_concat:V2HI
8488
            (ss_plus:HI
8489
              (vec_select:HI
8490
                (match_operand:V4HI 1 "register_operand" "0")
8491
                (parallel [(const_int 0)]))
8492
              (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8493
            (ss_plus:HI
8494
              (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8495
              (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8496
          (vec_concat:V2HI
8497
            (ss_plus:HI
8498
              (vec_select:HI
8499
                (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8500
                (parallel [(const_int 0)]))
8501
              (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8502
            (ss_plus:HI
8503
              (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8504
              (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8505
  "TARGET_SSSE3"
8506
  "phaddsw\t{%2, %0|%0, %2}"
8507
  [(set_attr "type" "sseiadd")
8508
   (set_attr "atom_unit" "complex")
8509
   (set_attr "prefix_extra" "1")
8510
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8511
   (set_attr "mode" "DI")])
8512
 
8513
(define_insn "*avx_phsubwv8hi3"
8514
  [(set (match_operand:V8HI 0 "register_operand" "=x")
8515
        (vec_concat:V8HI
8516
          (vec_concat:V4HI
8517
            (vec_concat:V2HI
8518
              (minus:HI
8519
                (vec_select:HI
8520
                  (match_operand:V8HI 1 "register_operand" "x")
8521
                  (parallel [(const_int 0)]))
8522
                (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8523
              (minus:HI
8524
                (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8525
                (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8526
            (vec_concat:V2HI
8527
              (minus:HI
8528
                (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8529
                (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8530
              (minus:HI
8531
                (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8532
                (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8533
          (vec_concat:V4HI
8534
            (vec_concat:V2HI
8535
              (minus:HI
8536
                (vec_select:HI
8537
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8538
                  (parallel [(const_int 0)]))
8539
                (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8540
              (minus:HI
8541
                (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8542
                (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8543
            (vec_concat:V2HI
8544
              (minus:HI
8545
                (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8546
                (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8547
              (minus:HI
8548
                (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8549
                (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8550
  "TARGET_AVX"
8551
  "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8552
  [(set_attr "type" "sseiadd")
8553
   (set_attr "prefix_extra" "1")
8554
   (set_attr "prefix" "vex")
8555
   (set_attr "mode" "TI")])
8556
 
8557
(define_insn "ssse3_phsubwv8hi3"
8558
  [(set (match_operand:V8HI 0 "register_operand" "=x")
8559
        (vec_concat:V8HI
8560
          (vec_concat:V4HI
8561
            (vec_concat:V2HI
8562
              (minus:HI
8563
                (vec_select:HI
8564
                  (match_operand:V8HI 1 "register_operand" "0")
8565
                  (parallel [(const_int 0)]))
8566
                (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8567
              (minus:HI
8568
                (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8569
                (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8570
            (vec_concat:V2HI
8571
              (minus:HI
8572
                (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8573
                (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8574
              (minus:HI
8575
                (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8576
                (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8577
          (vec_concat:V4HI
8578
            (vec_concat:V2HI
8579
              (minus:HI
8580
                (vec_select:HI
8581
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8582
                  (parallel [(const_int 0)]))
8583
                (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8584
              (minus:HI
8585
                (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8586
                (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8587
            (vec_concat:V2HI
8588
              (minus:HI
8589
                (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8590
                (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8591
              (minus:HI
8592
                (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8593
                (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8594
  "TARGET_SSSE3"
8595
  "phsubw\t{%2, %0|%0, %2}"
8596
  [(set_attr "type" "sseiadd")
8597
   (set_attr "atom_unit" "complex")
8598
   (set_attr "prefix_data16" "1")
8599
   (set_attr "prefix_extra" "1")
8600
   (set_attr "mode" "TI")])
8601
 
8602
(define_insn "ssse3_phsubwv4hi3"
8603
  [(set (match_operand:V4HI 0 "register_operand" "=y")
8604
        (vec_concat:V4HI
8605
          (vec_concat:V2HI
8606
            (minus:HI
8607
              (vec_select:HI
8608
                (match_operand:V4HI 1 "register_operand" "0")
8609
                (parallel [(const_int 0)]))
8610
              (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8611
            (minus:HI
8612
              (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8613
              (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8614
          (vec_concat:V2HI
8615
            (minus:HI
8616
              (vec_select:HI
8617
                (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8618
                (parallel [(const_int 0)]))
8619
              (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8620
            (minus:HI
8621
              (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8622
              (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8623
  "TARGET_SSSE3"
8624
  "phsubw\t{%2, %0|%0, %2}"
8625
  [(set_attr "type" "sseiadd")
8626
   (set_attr "atom_unit" "complex")
8627
   (set_attr "prefix_extra" "1")
8628
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8629
   (set_attr "mode" "DI")])
8630
 
8631
(define_insn "*avx_phsubdv4si3"
8632
  [(set (match_operand:V4SI 0 "register_operand" "=x")
8633
        (vec_concat:V4SI
8634
          (vec_concat:V2SI
8635
            (minus:SI
8636
              (vec_select:SI
8637
                (match_operand:V4SI 1 "register_operand" "x")
8638
                (parallel [(const_int 0)]))
8639
              (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8640
            (minus:SI
8641
              (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8642
              (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8643
          (vec_concat:V2SI
8644
            (minus:SI
8645
              (vec_select:SI
8646
                (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8647
                (parallel [(const_int 0)]))
8648
              (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8649
            (minus:SI
8650
              (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8651
              (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8652
  "TARGET_AVX"
8653
  "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8654
  [(set_attr "type" "sseiadd")
8655
   (set_attr "prefix_extra" "1")
8656
   (set_attr "prefix" "vex")
8657
   (set_attr "mode" "TI")])
8658
 
8659
(define_insn "ssse3_phsubdv4si3"
8660
  [(set (match_operand:V4SI 0 "register_operand" "=x")
8661
        (vec_concat:V4SI
8662
          (vec_concat:V2SI
8663
            (minus:SI
8664
              (vec_select:SI
8665
                (match_operand:V4SI 1 "register_operand" "0")
8666
                (parallel [(const_int 0)]))
8667
              (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8668
            (minus:SI
8669
              (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8670
              (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8671
          (vec_concat:V2SI
8672
            (minus:SI
8673
              (vec_select:SI
8674
                (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8675
                (parallel [(const_int 0)]))
8676
              (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8677
            (minus:SI
8678
              (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8679
              (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8680
  "TARGET_SSSE3"
8681
  "phsubd\t{%2, %0|%0, %2}"
8682
  [(set_attr "type" "sseiadd")
8683
   (set_attr "atom_unit" "complex")
8684
   (set_attr "prefix_data16" "1")
8685
   (set_attr "prefix_extra" "1")
8686
   (set_attr "mode" "TI")])
8687
 
8688
(define_insn "ssse3_phsubdv2si3"
8689
  [(set (match_operand:V2SI 0 "register_operand" "=y")
8690
        (vec_concat:V2SI
8691
          (minus:SI
8692
            (vec_select:SI
8693
              (match_operand:V2SI 1 "register_operand" "0")
8694
              (parallel [(const_int 0)]))
8695
            (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8696
          (minus:SI
8697
            (vec_select:SI
8698
              (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8699
              (parallel [(const_int 0)]))
8700
            (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8701
  "TARGET_SSSE3"
8702
  "phsubd\t{%2, %0|%0, %2}"
8703
  [(set_attr "type" "sseiadd")
8704
   (set_attr "atom_unit" "complex")
8705
   (set_attr "prefix_extra" "1")
8706
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8707
   (set_attr "mode" "DI")])
8708
 
8709
(define_insn "*avx_phsubswv8hi3"
8710
  [(set (match_operand:V8HI 0 "register_operand" "=x")
8711
        (vec_concat:V8HI
8712
          (vec_concat:V4HI
8713
            (vec_concat:V2HI
8714
              (ss_minus:HI
8715
                (vec_select:HI
8716
                  (match_operand:V8HI 1 "register_operand" "x")
8717
                  (parallel [(const_int 0)]))
8718
                (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8719
              (ss_minus:HI
8720
                (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8721
                (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8722
            (vec_concat:V2HI
8723
              (ss_minus:HI
8724
                (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8725
                (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8726
              (ss_minus:HI
8727
                (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8728
                (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8729
          (vec_concat:V4HI
8730
            (vec_concat:V2HI
8731
              (ss_minus:HI
8732
                (vec_select:HI
8733
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8734
                  (parallel [(const_int 0)]))
8735
                (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8736
              (ss_minus:HI
8737
                (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8738
                (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8739
            (vec_concat:V2HI
8740
              (ss_minus:HI
8741
                (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8742
                (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8743
              (ss_minus:HI
8744
                (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8745
                (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8746
  "TARGET_AVX"
8747
  "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8748
  [(set_attr "type" "sseiadd")
8749
   (set_attr "prefix_extra" "1")
8750
   (set_attr "prefix" "vex")
8751
   (set_attr "mode" "TI")])
8752
 
8753
(define_insn "ssse3_phsubswv8hi3"
8754
  [(set (match_operand:V8HI 0 "register_operand" "=x")
8755
        (vec_concat:V8HI
8756
          (vec_concat:V4HI
8757
            (vec_concat:V2HI
8758
              (ss_minus:HI
8759
                (vec_select:HI
8760
                  (match_operand:V8HI 1 "register_operand" "0")
8761
                  (parallel [(const_int 0)]))
8762
                (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8763
              (ss_minus:HI
8764
                (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8765
                (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8766
            (vec_concat:V2HI
8767
              (ss_minus:HI
8768
                (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8769
                (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8770
              (ss_minus:HI
8771
                (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8772
                (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8773
          (vec_concat:V4HI
8774
            (vec_concat:V2HI
8775
              (ss_minus:HI
8776
                (vec_select:HI
8777
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8778
                  (parallel [(const_int 0)]))
8779
                (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8780
              (ss_minus:HI
8781
                (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8782
                (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8783
            (vec_concat:V2HI
8784
              (ss_minus:HI
8785
                (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8786
                (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8787
              (ss_minus:HI
8788
                (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8789
                (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8790
  "TARGET_SSSE3"
8791
  "phsubsw\t{%2, %0|%0, %2}"
8792
  [(set_attr "type" "sseiadd")
8793
   (set_attr "atom_unit" "complex")
8794
   (set_attr "prefix_data16" "1")
8795
   (set_attr "prefix_extra" "1")
8796
   (set_attr "mode" "TI")])
8797
 
8798
(define_insn "ssse3_phsubswv4hi3"
8799
  [(set (match_operand:V4HI 0 "register_operand" "=y")
8800
        (vec_concat:V4HI
8801
          (vec_concat:V2HI
8802
            (ss_minus:HI
8803
              (vec_select:HI
8804
                (match_operand:V4HI 1 "register_operand" "0")
8805
                (parallel [(const_int 0)]))
8806
              (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8807
            (ss_minus:HI
8808
              (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8809
              (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8810
          (vec_concat:V2HI
8811
            (ss_minus:HI
8812
              (vec_select:HI
8813
                (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8814
                (parallel [(const_int 0)]))
8815
              (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8816
            (ss_minus:HI
8817
              (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8818
              (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8819
  "TARGET_SSSE3"
8820
  "phsubsw\t{%2, %0|%0, %2}"
8821
  [(set_attr "type" "sseiadd")
8822
   (set_attr "atom_unit" "complex")
8823
   (set_attr "prefix_extra" "1")
8824
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8825
   (set_attr "mode" "DI")])
8826
 
8827
(define_insn "*avx_pmaddubsw128"
8828
  [(set (match_operand:V8HI 0 "register_operand" "=x")
8829
        (ss_plus:V8HI
8830
          (mult:V8HI
8831
            (zero_extend:V8HI
8832
              (vec_select:V4QI
8833
                (match_operand:V16QI 1 "register_operand" "x")
8834
                (parallel [(const_int 0)
8835
                           (const_int 2)
8836
                           (const_int 4)
8837
                           (const_int 6)
8838
                           (const_int 8)
8839
                           (const_int 10)
8840
                           (const_int 12)
8841
                           (const_int 14)])))
8842
            (sign_extend:V8HI
8843
              (vec_select:V8QI
8844
                (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8845
                (parallel [(const_int 0)
8846
                           (const_int 2)
8847
                           (const_int 4)
8848
                           (const_int 6)
8849
                           (const_int 8)
8850
                           (const_int 10)
8851
                           (const_int 12)
8852
                           (const_int 14)]))))
8853
          (mult:V8HI
8854
            (zero_extend:V8HI
8855
              (vec_select:V16QI (match_dup 1)
8856
                (parallel [(const_int 1)
8857
                           (const_int 3)
8858
                           (const_int 5)
8859
                           (const_int 7)
8860
                           (const_int 9)
8861
                           (const_int 11)
8862
                           (const_int 13)
8863
                           (const_int 15)])))
8864
            (sign_extend:V8HI
8865
              (vec_select:V16QI (match_dup 2)
8866
                (parallel [(const_int 1)
8867
                           (const_int 3)
8868
                           (const_int 5)
8869
                           (const_int 7)
8870
                           (const_int 9)
8871
                           (const_int 11)
8872
                           (const_int 13)
8873
                           (const_int 15)]))))))]
8874
  "TARGET_AVX"
8875
  "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8876
  [(set_attr "type" "sseiadd")
8877
   (set_attr "prefix_extra" "1")
8878
   (set_attr "prefix" "vex")
8879
   (set_attr "mode" "TI")])
8880
 
8881
(define_insn "ssse3_pmaddubsw128"
8882
  [(set (match_operand:V8HI 0 "register_operand" "=x")
8883
        (ss_plus:V8HI
8884
          (mult:V8HI
8885
            (zero_extend:V8HI
8886
              (vec_select:V4QI
8887
                (match_operand:V16QI 1 "register_operand" "0")
8888
                (parallel [(const_int 0)
8889
                           (const_int 2)
8890
                           (const_int 4)
8891
                           (const_int 6)
8892
                           (const_int 8)
8893
                           (const_int 10)
8894
                           (const_int 12)
8895
                           (const_int 14)])))
8896
            (sign_extend:V8HI
8897
              (vec_select:V8QI
8898
                (match_operand:V16QI 2 "nonimmediate_operand" "xm")
8899
                (parallel [(const_int 0)
8900
                           (const_int 2)
8901
                           (const_int 4)
8902
                           (const_int 6)
8903
                           (const_int 8)
8904
                           (const_int 10)
8905
                           (const_int 12)
8906
                           (const_int 14)]))))
8907
          (mult:V8HI
8908
            (zero_extend:V8HI
8909
              (vec_select:V16QI (match_dup 1)
8910
                (parallel [(const_int 1)
8911
                           (const_int 3)
8912
                           (const_int 5)
8913
                           (const_int 7)
8914
                           (const_int 9)
8915
                           (const_int 11)
8916
                           (const_int 13)
8917
                           (const_int 15)])))
8918
            (sign_extend:V8HI
8919
              (vec_select:V16QI (match_dup 2)
8920
                (parallel [(const_int 1)
8921
                           (const_int 3)
8922
                           (const_int 5)
8923
                           (const_int 7)
8924
                           (const_int 9)
8925
                           (const_int 11)
8926
                           (const_int 13)
8927
                           (const_int 15)]))))))]
8928
  "TARGET_SSSE3"
8929
  "pmaddubsw\t{%2, %0|%0, %2}"
8930
  [(set_attr "type" "sseiadd")
8931
   (set_attr "atom_unit" "simul")
8932
   (set_attr "prefix_data16" "1")
8933
   (set_attr "prefix_extra" "1")
8934
   (set_attr "mode" "TI")])
8935
 
8936
(define_insn "ssse3_pmaddubsw"
8937
  [(set (match_operand:V4HI 0 "register_operand" "=y")
8938
        (ss_plus:V4HI
8939
          (mult:V4HI
8940
            (zero_extend:V4HI
8941
              (vec_select:V4QI
8942
                (match_operand:V8QI 1 "register_operand" "0")
8943
                (parallel [(const_int 0)
8944
                           (const_int 2)
8945
                           (const_int 4)
8946
                           (const_int 6)])))
8947
            (sign_extend:V4HI
8948
              (vec_select:V4QI
8949
                (match_operand:V8QI 2 "nonimmediate_operand" "ym")
8950
                (parallel [(const_int 0)
8951
                           (const_int 2)
8952
                           (const_int 4)
8953
                           (const_int 6)]))))
8954
          (mult:V4HI
8955
            (zero_extend:V4HI
8956
              (vec_select:V8QI (match_dup 1)
8957
                (parallel [(const_int 1)
8958
                           (const_int 3)
8959
                           (const_int 5)
8960
                           (const_int 7)])))
8961
            (sign_extend:V4HI
8962
              (vec_select:V8QI (match_dup 2)
8963
                (parallel [(const_int 1)
8964
                           (const_int 3)
8965
                           (const_int 5)
8966
                           (const_int 7)]))))))]
8967
  "TARGET_SSSE3"
8968
  "pmaddubsw\t{%2, %0|%0, %2}"
8969
  [(set_attr "type" "sseiadd")
8970
   (set_attr "atom_unit" "simul")
8971
   (set_attr "prefix_extra" "1")
8972
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8973
   (set_attr "mode" "DI")])
8974
 
8975
(define_expand "ssse3_pmulhrswv8hi3"
8976
  [(set (match_operand:V8HI 0 "register_operand" "")
8977
        (truncate:V8HI
8978
          (lshiftrt:V8SI
8979
            (plus:V8SI
8980
              (lshiftrt:V8SI
8981
                (mult:V8SI
8982
                  (sign_extend:V8SI
8983
                    (match_operand:V8HI 1 "nonimmediate_operand" ""))
8984
                  (sign_extend:V8SI
8985
                    (match_operand:V8HI 2 "nonimmediate_operand" "")))
8986
                (const_int 14))
8987
              (const_vector:V8HI [(const_int 1) (const_int 1)
8988
                                  (const_int 1) (const_int 1)
8989
                                  (const_int 1) (const_int 1)
8990
                                  (const_int 1) (const_int 1)]))
8991
            (const_int 1))))]
8992
  "TARGET_SSSE3"
8993
  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8994
 
8995
(define_insn "*avx_pmulhrswv8hi3"
8996
  [(set (match_operand:V8HI 0 "register_operand" "=x")
8997
        (truncate:V8HI
8998
          (lshiftrt:V8SI
8999
            (plus:V8SI
9000
              (lshiftrt:V8SI
9001
                (mult:V8SI
9002
                  (sign_extend:V8SI
9003
                    (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
9004
                  (sign_extend:V8SI
9005
                    (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9006
                (const_int 14))
9007
              (const_vector:V8HI [(const_int 1) (const_int 1)
9008
                                  (const_int 1) (const_int 1)
9009
                                  (const_int 1) (const_int 1)
9010
                                  (const_int 1) (const_int 1)]))
9011
            (const_int 1))))]
9012
  "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9013
  "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9014
  [(set_attr "type" "sseimul")
9015
   (set_attr "prefix_extra" "1")
9016
   (set_attr "prefix" "vex")
9017
   (set_attr "mode" "TI")])
9018
 
9019
(define_insn "*ssse3_pmulhrswv8hi3"
9020
  [(set (match_operand:V8HI 0 "register_operand" "=x")
9021
        (truncate:V8HI
9022
          (lshiftrt:V8SI
9023
            (plus:V8SI
9024
              (lshiftrt:V8SI
9025
                (mult:V8SI
9026
                  (sign_extend:V8SI
9027
                    (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
9028
                  (sign_extend:V8SI
9029
                    (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
9030
                (const_int 14))
9031
              (const_vector:V8HI [(const_int 1) (const_int 1)
9032
                                  (const_int 1) (const_int 1)
9033
                                  (const_int 1) (const_int 1)
9034
                                  (const_int 1) (const_int 1)]))
9035
            (const_int 1))))]
9036
  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9037
  "pmulhrsw\t{%2, %0|%0, %2}"
9038
  [(set_attr "type" "sseimul")
9039
   (set_attr "prefix_data16" "1")
9040
   (set_attr "prefix_extra" "1")
9041
   (set_attr "mode" "TI")])
9042
 
9043
(define_expand "ssse3_pmulhrswv4hi3"
9044
  [(set (match_operand:V4HI 0 "register_operand" "")
9045
        (truncate:V4HI
9046
          (lshiftrt:V4SI
9047
            (plus:V4SI
9048
              (lshiftrt:V4SI
9049
                (mult:V4SI
9050
                  (sign_extend:V4SI
9051
                    (match_operand:V4HI 1 "nonimmediate_operand" ""))
9052
                  (sign_extend:V4SI
9053
                    (match_operand:V4HI 2 "nonimmediate_operand" "")))
9054
                (const_int 14))
9055
              (const_vector:V4HI [(const_int 1) (const_int 1)
9056
                                  (const_int 1) (const_int 1)]))
9057
            (const_int 1))))]
9058
  "TARGET_SSSE3"
9059
  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9060
 
9061
(define_insn "*ssse3_pmulhrswv4hi3"
9062
  [(set (match_operand:V4HI 0 "register_operand" "=y")
9063
        (truncate:V4HI
9064
          (lshiftrt:V4SI
9065
            (plus:V4SI
9066
              (lshiftrt:V4SI
9067
                (mult:V4SI
9068
                  (sign_extend:V4SI
9069
                    (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9070
                  (sign_extend:V4SI
9071
                    (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9072
                (const_int 14))
9073
              (const_vector:V4HI [(const_int 1) (const_int 1)
9074
                                  (const_int 1) (const_int 1)]))
9075
            (const_int 1))))]
9076
  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9077
  "pmulhrsw\t{%2, %0|%0, %2}"
9078
  [(set_attr "type" "sseimul")
9079
   (set_attr "prefix_extra" "1")
9080
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9081
   (set_attr "mode" "DI")])
9082
 
9083
(define_insn "*avx_pshufbv16qi3"
9084
  [(set (match_operand:V16QI 0 "register_operand" "=x")
9085
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9086
                       (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9087
                      UNSPEC_PSHUFB))]
9088
  "TARGET_AVX"
9089
  "vpshufb\t{%2, %1, %0|%0, %1, %2}";
9090
  [(set_attr "type" "sselog1")
9091
   (set_attr "prefix_extra" "1")
9092
   (set_attr "prefix" "vex")
9093
   (set_attr "mode" "TI")])
9094
 
9095
(define_insn "ssse3_pshufbv16qi3"
9096
  [(set (match_operand:V16QI 0 "register_operand" "=x")
9097
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9098
                       (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
9099
                      UNSPEC_PSHUFB))]
9100
  "TARGET_SSSE3"
9101
  "pshufb\t{%2, %0|%0, %2}";
9102
  [(set_attr "type" "sselog1")
9103
   (set_attr "prefix_data16" "1")
9104
   (set_attr "prefix_extra" "1")
9105
   (set_attr "mode" "TI")])
9106
 
9107
(define_insn "ssse3_pshufbv8qi3"
9108
  [(set (match_operand:V8QI 0 "register_operand" "=y")
9109
        (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9110
                      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9111
                     UNSPEC_PSHUFB))]
9112
  "TARGET_SSSE3"
9113
  "pshufb\t{%2, %0|%0, %2}";
9114
  [(set_attr "type" "sselog1")
9115
   (set_attr "prefix_extra" "1")
9116
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9117
   (set_attr "mode" "DI")])
9118
 
9119
(define_insn "*avx_psign3"
9120
  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9121
        (unspec:SSEMODE124
9122
          [(match_operand:SSEMODE124 1 "register_operand" "x")
9123
           (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9124
          UNSPEC_PSIGN))]
9125
  "TARGET_AVX"
9126
  "vpsign\t{%2, %1, %0|%0, %1, %2}";
9127
  [(set_attr "type" "sselog1")
9128
   (set_attr "prefix_extra" "1")
9129
   (set_attr "prefix" "vex")
9130
   (set_attr "mode" "TI")])
9131
 
9132
(define_insn "ssse3_psign3"
9133
  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9134
        (unspec:SSEMODE124
9135
          [(match_operand:SSEMODE124 1 "register_operand" "0")
9136
           (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
9137
          UNSPEC_PSIGN))]
9138
  "TARGET_SSSE3"
9139
  "psign\t{%2, %0|%0, %2}";
9140
  [(set_attr "type" "sselog1")
9141
   (set_attr "prefix_data16" "1")
9142
   (set_attr "prefix_extra" "1")
9143
   (set_attr "mode" "TI")])
9144
 
9145
(define_insn "ssse3_psign3"
9146
  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9147
        (unspec:MMXMODEI
9148
          [(match_operand:MMXMODEI 1 "register_operand" "0")
9149
           (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9150
          UNSPEC_PSIGN))]
9151
  "TARGET_SSSE3"
9152
  "psign\t{%2, %0|%0, %2}";
9153
  [(set_attr "type" "sselog1")
9154
   (set_attr "prefix_extra" "1")
9155
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9156
   (set_attr "mode" "DI")])
9157
 
9158
(define_insn "*avx_palignrti"
9159
  [(set (match_operand:TI 0 "register_operand" "=x")
9160
        (unspec:TI [(match_operand:TI 1 "register_operand" "x")
9161
                    (match_operand:TI 2 "nonimmediate_operand" "xm")
9162
                    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9163
                   UNSPEC_PALIGNR))]
9164
  "TARGET_AVX"
9165
{
9166
  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9167
  return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9168
}
9169
  [(set_attr "type" "sseishft")
9170
   (set_attr "prefix_extra" "1")
9171
   (set_attr "length_immediate" "1")
9172
   (set_attr "prefix" "vex")
9173
   (set_attr "mode" "TI")])
9174
 
9175
(define_insn "ssse3_palignrti"
9176
  [(set (match_operand:TI 0 "register_operand" "=x")
9177
        (unspec:TI [(match_operand:TI 1 "register_operand" "0")
9178
                    (match_operand:TI 2 "nonimmediate_operand" "xm")
9179
                    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9180
                   UNSPEC_PALIGNR))]
9181
  "TARGET_SSSE3"
9182
{
9183
  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9184
  return "palignr\t{%3, %2, %0|%0, %2, %3}";
9185
}
9186
  [(set_attr "type" "sseishft")
9187
   (set_attr "atom_unit" "sishuf")
9188
   (set_attr "prefix_data16" "1")
9189
   (set_attr "prefix_extra" "1")
9190
   (set_attr "length_immediate" "1")
9191
   (set_attr "mode" "TI")])
9192
 
9193
(define_insn "ssse3_palignrdi"
9194
  [(set (match_operand:DI 0 "register_operand" "=y")
9195
        (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9196
                    (match_operand:DI 2 "nonimmediate_operand" "ym")
9197
                    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9198
                   UNSPEC_PALIGNR))]
9199
  "TARGET_SSSE3"
9200
{
9201
  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9202
  return "palignr\t{%3, %2, %0|%0, %2, %3}";
9203
}
9204
  [(set_attr "type" "sseishft")
9205
   (set_attr "atom_unit" "sishuf")
9206
   (set_attr "prefix_extra" "1")
9207
   (set_attr "length_immediate" "1")
9208
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9209
   (set_attr "mode" "DI")])
9210
 
9211
(define_insn "abs2"
9212
  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
9213
        (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
9214
  "TARGET_SSSE3"
9215
  "%vpabs\t{%1, %0|%0, %1}"
9216
  [(set_attr "type" "sselog1")
9217
   (set_attr "prefix_data16" "1")
9218
   (set_attr "prefix_extra" "1")
9219
   (set_attr "prefix" "maybe_vex")
9220
   (set_attr "mode" "TI")])
9221
 
9222
(define_insn "abs2"
9223
  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9224
        (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9225
  "TARGET_SSSE3"
9226
  "pabs\t{%1, %0|%0, %1}";
9227
  [(set_attr "type" "sselog1")
9228
   (set_attr "prefix_rep" "0")
9229
   (set_attr "prefix_extra" "1")
9230
   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9231
   (set_attr "mode" "DI")])
9232
 
9233
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9234
;;
9235
;; AMD SSE4A instructions
9236
;;
9237
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9238
 
9239
(define_insn "sse4a_movnt"
9240
  [(set (match_operand:MODEF 0 "memory_operand" "=m")
9241
        (unspec:MODEF
9242
          [(match_operand:MODEF 1 "register_operand" "x")]
9243
          UNSPEC_MOVNT))]
9244
  "TARGET_SSE4A"
9245
  "movnts\t{%1, %0|%0, %1}"
9246
  [(set_attr "type" "ssemov")
9247
   (set_attr "mode" "")])
9248
 
9249
(define_insn "sse4a_vmmovnt"
9250
  [(set (match_operand: 0 "memory_operand" "=m")
9251
        (unspec:
9252
          [(vec_select:
9253
             (match_operand:SSEMODEF2P 1 "register_operand" "x")
9254
             (parallel [(const_int 0)]))]
9255
          UNSPEC_MOVNT))]
9256
  "TARGET_SSE4A"
9257
  "movnts\t{%1, %0|%0, %1}"
9258
  [(set_attr "type" "ssemov")
9259
   (set_attr "mode" "")])
9260
 
9261
(define_insn "sse4a_extrqi"
9262
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9263
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9264
                      (match_operand 2 "const_int_operand" "")
9265
                      (match_operand 3 "const_int_operand" "")]
9266
                     UNSPEC_EXTRQI))]
9267
  "TARGET_SSE4A"
9268
  "extrq\t{%3, %2, %0|%0, %2, %3}"
9269
  [(set_attr "type" "sse")
9270
   (set_attr "prefix_data16" "1")
9271
   (set_attr "length_immediate" "2")
9272
   (set_attr "mode" "TI")])
9273
 
9274
(define_insn "sse4a_extrq"
9275
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9276
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9277
                      (match_operand:V16QI 2 "register_operand" "x")]
9278
                     UNSPEC_EXTRQ))]
9279
  "TARGET_SSE4A"
9280
  "extrq\t{%2, %0|%0, %2}"
9281
  [(set_attr "type" "sse")
9282
   (set_attr "prefix_data16" "1")
9283
   (set_attr "mode" "TI")])
9284
 
9285
(define_insn "sse4a_insertqi"
9286
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9287
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9288
                      (match_operand:V2DI 2 "register_operand" "x")
9289
                      (match_operand 3 "const_int_operand" "")
9290
                      (match_operand 4 "const_int_operand" "")]
9291
                     UNSPEC_INSERTQI))]
9292
  "TARGET_SSE4A"
9293
  "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9294
  [(set_attr "type" "sseins")
9295
   (set_attr "prefix_data16" "0")
9296
   (set_attr "prefix_rep" "1")
9297
   (set_attr "length_immediate" "2")
9298
   (set_attr "mode" "TI")])
9299
 
9300
(define_insn "sse4a_insertq"
9301
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9302
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9303
                      (match_operand:V2DI 2 "register_operand" "x")]
9304
                     UNSPEC_INSERTQ))]
9305
  "TARGET_SSE4A"
9306
  "insertq\t{%2, %0|%0, %2}"
9307
  [(set_attr "type" "sseins")
9308
   (set_attr "prefix_data16" "0")
9309
   (set_attr "prefix_rep" "1")
9310
   (set_attr "mode" "TI")])
9311
 
9312
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9313
;;
9314
;; Intel SSE4.1 instructions
9315
;;
9316
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9317
 
9318
(define_insn "avx_blendp"
9319
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9320
        (vec_merge:AVXMODEF2P
9321
          (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9322
          (match_operand:AVXMODEF2P 1 "register_operand" "x")
9323
          (match_operand:SI 3 "const_0_to__operand" "n")))]
9324
  "TARGET_AVX"
9325
  "vblendp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9326
  [(set_attr "type" "ssemov")
9327
   (set_attr "prefix_extra" "1")
9328
   (set_attr "length_immediate" "1")
9329
   (set_attr "prefix" "vex")
9330
   (set_attr "mode" "")])
9331
 
9332
(define_insn "avx_blendvp"
9333
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9334
        (unspec:AVXMODEF2P
9335
          [(match_operand:AVXMODEF2P 1 "register_operand" "x")
9336
           (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9337
           (match_operand:AVXMODEF2P 3 "register_operand" "x")]
9338
          UNSPEC_BLENDV))]
9339
  "TARGET_AVX"
9340
  "vblendvp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9341
  [(set_attr "type" "ssemov")
9342
   (set_attr "prefix_extra" "1")
9343
   (set_attr "length_immediate" "1")
9344
   (set_attr "prefix" "vex")
9345
   (set_attr "mode" "")])
9346
 
9347
(define_insn "sse4_1_blendp"
9348
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9349
        (vec_merge:SSEMODEF2P
9350
          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9351
          (match_operand:SSEMODEF2P 1 "register_operand" "0")
9352
          (match_operand:SI 3 "const_0_to__operand" "n")))]
9353
  "TARGET_SSE4_1"
9354
  "blendp\t{%3, %2, %0|%0, %2, %3}"
9355
  [(set_attr "type" "ssemov")
9356
   (set_attr "prefix_data16" "1")
9357
   (set_attr "prefix_extra" "1")
9358
   (set_attr "length_immediate" "1")
9359
   (set_attr "mode" "")])
9360
 
9361
(define_insn "sse4_1_blendvp"
9362
  [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
9363
        (unspec:SSEMODEF2P
9364
          [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
9365
           (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
9366
           (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
9367
          UNSPEC_BLENDV))]
9368
  "TARGET_SSE4_1"
9369
  "blendvp\t{%3, %2, %0|%0, %2, %3}"
9370
  [(set_attr "type" "ssemov")
9371
   (set_attr "prefix_data16" "1")
9372
   (set_attr "prefix_extra" "1")
9373
   (set_attr "mode" "")])
9374
 
9375
(define_insn "avx_dpp"
9376
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
9377
        (unspec:AVXMODEF2P
9378
          [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
9379
           (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
9380
           (match_operand:SI 3 "const_0_to_255_operand" "n")]
9381
          UNSPEC_DP))]
9382
  "TARGET_AVX"
9383
  "vdpp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9384
  [(set_attr "type" "ssemul")
9385
   (set_attr "prefix" "vex")
9386
   (set_attr "prefix_extra" "1")
9387
   (set_attr "length_immediate" "1")
9388
   (set_attr "mode" "")])
9389
 
9390
(define_insn "sse4_1_dpp"
9391
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9392
        (unspec:SSEMODEF2P
9393
          [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
9394
           (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
9395
           (match_operand:SI 3 "const_0_to_255_operand" "n")]
9396
          UNSPEC_DP))]
9397
  "TARGET_SSE4_1"
9398
  "dpp\t{%3, %2, %0|%0, %2, %3}"
9399
  [(set_attr "type" "ssemul")
9400
   (set_attr "prefix_data16" "1")
9401
   (set_attr "prefix_extra" "1")
9402
   (set_attr "length_immediate" "1")
9403
   (set_attr "mode" "")])
9404
 
9405
(define_insn "sse4_1_movntdqa"
9406
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9407
        (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
9408
                     UNSPEC_MOVNTDQA))]
9409
  "TARGET_SSE4_1"
9410
  "%vmovntdqa\t{%1, %0|%0, %1}"
9411
  [(set_attr "type" "ssemov")
9412
   (set_attr "prefix_extra" "1")
9413
   (set_attr "prefix" "maybe_vex")
9414
   (set_attr "mode" "TI")])
9415
 
9416
(define_insn "*avx_mpsadbw"
9417
  [(set (match_operand:V16QI 0 "register_operand" "=x")
9418
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
9419
                       (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9420
                       (match_operand:SI 3 "const_0_to_255_operand" "n")]
9421
                      UNSPEC_MPSADBW))]
9422
  "TARGET_AVX"
9423
  "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9424
  [(set_attr "type" "sselog1")
9425
   (set_attr "prefix" "vex")
9426
   (set_attr "prefix_extra" "1")
9427
   (set_attr "length_immediate" "1")
9428
   (set_attr "mode" "TI")])
9429
 
9430
(define_insn "sse4_1_mpsadbw"
9431
  [(set (match_operand:V16QI 0 "register_operand" "=x")
9432
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
9433
                       (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9434
                       (match_operand:SI 3 "const_0_to_255_operand" "n")]
9435
                      UNSPEC_MPSADBW))]
9436
  "TARGET_SSE4_1"
9437
  "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
9438
  [(set_attr "type" "sselog1")
9439
   (set_attr "prefix_extra" "1")
9440
   (set_attr "length_immediate" "1")
9441
   (set_attr "mode" "TI")])
9442
 
9443
(define_insn "*avx_packusdw"
9444
  [(set (match_operand:V8HI 0 "register_operand" "=x")
9445
        (vec_concat:V8HI
9446
          (us_truncate:V4HI
9447
            (match_operand:V4SI 1 "register_operand" "x"))
9448
          (us_truncate:V4HI
9449
            (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9450
  "TARGET_AVX"
9451
  "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9452
  [(set_attr "type" "sselog")
9453
   (set_attr "prefix_extra" "1")
9454
   (set_attr "prefix" "vex")
9455
   (set_attr "mode" "TI")])
9456
 
9457
(define_insn "sse4_1_packusdw"
9458
  [(set (match_operand:V8HI 0 "register_operand" "=x")
9459
        (vec_concat:V8HI
9460
          (us_truncate:V4HI
9461
            (match_operand:V4SI 1 "register_operand" "0"))
9462
          (us_truncate:V4HI
9463
            (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9464
  "TARGET_SSE4_1"
9465
  "packusdw\t{%2, %0|%0, %2}"
9466
  [(set_attr "type" "sselog")
9467
   (set_attr "prefix_extra" "1")
9468
   (set_attr "mode" "TI")])
9469
 
9470
(define_insn "*avx_pblendvb"
9471
  [(set (match_operand:V16QI 0 "register_operand" "=x")
9472
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand"  "x")
9473
                       (match_operand:V16QI 2 "nonimmediate_operand" "xm")
9474
                       (match_operand:V16QI 3 "register_operand" "x")]
9475
                      UNSPEC_BLENDV))]
9476
  "TARGET_AVX"
9477
  "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9478
  [(set_attr "type" "ssemov")
9479
   (set_attr "prefix_extra" "1")
9480
   (set_attr "length_immediate" "1")
9481
   (set_attr "prefix" "vex")
9482
   (set_attr "mode" "TI")])
9483
 
9484
(define_insn "sse4_1_pblendvb"
9485
  [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9486
        (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand"  "0")
9487
                       (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
9488
                       (match_operand:V16QI 3 "register_operand" "Yz")]
9489
                      UNSPEC_BLENDV))]
9490
  "TARGET_SSE4_1"
9491
  "pblendvb\t{%3, %2, %0|%0, %2, %3}"
9492
  [(set_attr "type" "ssemov")
9493
   (set_attr "prefix_extra" "1")
9494
   (set_attr "mode" "TI")])
9495
 
9496
(define_insn "*avx_pblendw"
9497
  [(set (match_operand:V8HI 0 "register_operand" "=x")
9498
        (vec_merge:V8HI
9499
          (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9500
          (match_operand:V8HI 1 "register_operand" "x")
9501
          (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9502
  "TARGET_AVX"
9503
  "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9504
  [(set_attr "type" "ssemov")
9505
   (set_attr "prefix" "vex")
9506
   (set_attr "prefix_extra" "1")
9507
   (set_attr "length_immediate" "1")
9508
   (set_attr "mode" "TI")])
9509
 
9510
(define_insn "sse4_1_pblendw"
9511
  [(set (match_operand:V8HI 0 "register_operand" "=x")
9512
        (vec_merge:V8HI
9513
          (match_operand:V8HI 2 "nonimmediate_operand" "xm")
9514
          (match_operand:V8HI 1 "register_operand" "0")
9515
          (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9516
  "TARGET_SSE4_1"
9517
  "pblendw\t{%3, %2, %0|%0, %2, %3}"
9518
  [(set_attr "type" "ssemov")
9519
   (set_attr "prefix_extra" "1")
9520
   (set_attr "length_immediate" "1")
9521
   (set_attr "mode" "TI")])
9522
 
9523
(define_insn "sse4_1_phminposuw"
9524
  [(set (match_operand:V8HI 0 "register_operand" "=x")
9525
        (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9526
                     UNSPEC_PHMINPOSUW))]
9527
  "TARGET_SSE4_1"
9528
  "%vphminposuw\t{%1, %0|%0, %1}"
9529
  [(set_attr "type" "sselog1")
9530
   (set_attr "prefix_extra" "1")
9531
   (set_attr "prefix" "maybe_vex")
9532
   (set_attr "mode" "TI")])
9533
 
9534
(define_insn "sse4_1_extendv8qiv8hi2"
9535
  [(set (match_operand:V8HI 0 "register_operand" "=x")
9536
        (sign_extend:V8HI
9537
          (vec_select:V8QI
9538
            (match_operand:V16QI 1 "register_operand" "x")
9539
            (parallel [(const_int 0)
9540
                       (const_int 1)
9541
                       (const_int 2)
9542
                       (const_int 3)
9543
                       (const_int 4)
9544
                       (const_int 5)
9545
                       (const_int 6)
9546
                       (const_int 7)]))))]
9547
  "TARGET_SSE4_1"
9548
  "%vpmovsxbw\t{%1, %0|%0, %1}"
9549
  [(set_attr "type" "ssemov")
9550
   (set_attr "prefix_extra" "1")
9551
   (set_attr "prefix" "maybe_vex")
9552
   (set_attr "mode" "TI")])
9553
 
9554
(define_insn "*sse4_1_extendv8qiv8hi2"
9555
  [(set (match_operand:V8HI 0 "register_operand" "=x")
9556
        (sign_extend:V8HI
9557
          (vec_select:V8QI
9558
            (vec_duplicate:V16QI
9559
              (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9560
            (parallel [(const_int 0)
9561
                       (const_int 1)
9562
                       (const_int 2)
9563
                       (const_int 3)
9564
                       (const_int 4)
9565
                       (const_int 5)
9566
                       (const_int 6)
9567
                       (const_int 7)]))))]
9568
  "TARGET_SSE4_1"
9569
  "%vpmovsxbw\t{%1, %0|%0, %1}"
9570
  [(set_attr "type" "ssemov")
9571
   (set_attr "prefix_extra" "1")
9572
   (set_attr "prefix" "maybe_vex")
9573
   (set_attr "mode" "TI")])
9574
 
9575
(define_insn "sse4_1_extendv4qiv4si2"
9576
  [(set (match_operand:V4SI 0 "register_operand" "=x")
9577
        (sign_extend:V4SI
9578
          (vec_select:V4QI
9579
            (match_operand:V16QI 1 "register_operand" "x")
9580
            (parallel [(const_int 0)
9581
                       (const_int 1)
9582
                       (const_int 2)
9583
                       (const_int 3)]))))]
9584
  "TARGET_SSE4_1"
9585
  "%vpmovsxbd\t{%1, %0|%0, %1}"
9586
  [(set_attr "type" "ssemov")
9587
   (set_attr "prefix_extra" "1")
9588
   (set_attr "prefix" "maybe_vex")
9589
   (set_attr "mode" "TI")])
9590
 
9591
(define_insn "*sse4_1_extendv4qiv4si2"
9592
  [(set (match_operand:V4SI 0 "register_operand" "=x")
9593
        (sign_extend:V4SI
9594
          (vec_select:V4QI
9595
            (vec_duplicate:V16QI
9596
              (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9597
            (parallel [(const_int 0)
9598
                       (const_int 1)
9599
                       (const_int 2)
9600
                       (const_int 3)]))))]
9601
  "TARGET_SSE4_1"
9602
  "%vpmovsxbd\t{%1, %0|%0, %1}"
9603
  [(set_attr "type" "ssemov")
9604
   (set_attr "prefix_extra" "1")
9605
   (set_attr "prefix" "maybe_vex")
9606
   (set_attr "mode" "TI")])
9607
 
9608
(define_insn "sse4_1_extendv2qiv2di2"
9609
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9610
        (sign_extend:V2DI
9611
          (vec_select:V2QI
9612
            (match_operand:V16QI 1 "register_operand" "x")
9613
            (parallel [(const_int 0)
9614
                       (const_int 1)]))))]
9615
  "TARGET_SSE4_1"
9616
  "%vpmovsxbq\t{%1, %0|%0, %1}"
9617
  [(set_attr "type" "ssemov")
9618
   (set_attr "prefix_extra" "1")
9619
   (set_attr "prefix" "maybe_vex")
9620
   (set_attr "mode" "TI")])
9621
 
9622
(define_insn "*sse4_1_extendv2qiv2di2"
9623
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9624
        (sign_extend:V2DI
9625
          (vec_select:V2QI
9626
            (vec_duplicate:V16QI
9627
              (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9628
            (parallel [(const_int 0)
9629
                       (const_int 1)]))))]
9630
  "TARGET_SSE4_1"
9631
  "%vpmovsxbq\t{%1, %0|%0, %1}"
9632
  [(set_attr "type" "ssemov")
9633
   (set_attr "prefix_extra" "1")
9634
   (set_attr "prefix" "maybe_vex")
9635
   (set_attr "mode" "TI")])
9636
 
9637
(define_insn "sse4_1_extendv4hiv4si2"
9638
  [(set (match_operand:V4SI 0 "register_operand" "=x")
9639
        (sign_extend:V4SI
9640
          (vec_select:V4HI
9641
            (match_operand:V8HI 1 "register_operand" "x")
9642
            (parallel [(const_int 0)
9643
                       (const_int 1)
9644
                       (const_int 2)
9645
                       (const_int 3)]))))]
9646
  "TARGET_SSE4_1"
9647
  "%vpmovsxwd\t{%1, %0|%0, %1}"
9648
  [(set_attr "type" "ssemov")
9649
   (set_attr "prefix_extra" "1")
9650
   (set_attr "prefix" "maybe_vex")
9651
   (set_attr "mode" "TI")])
9652
 
9653
(define_insn "*sse4_1_extendv4hiv4si2"
9654
  [(set (match_operand:V4SI 0 "register_operand" "=x")
9655
        (sign_extend:V4SI
9656
          (vec_select:V4HI
9657
            (vec_duplicate:V8HI
9658
              (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9659
            (parallel [(const_int 0)
9660
                       (const_int 1)
9661
                       (const_int 2)
9662
                       (const_int 3)]))))]
9663
  "TARGET_SSE4_1"
9664
  "%vpmovsxwd\t{%1, %0|%0, %1}"
9665
  [(set_attr "type" "ssemov")
9666
   (set_attr "prefix_extra" "1")
9667
   (set_attr "prefix" "maybe_vex")
9668
   (set_attr "mode" "TI")])
9669
 
9670
(define_insn "sse4_1_extendv2hiv2di2"
9671
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9672
        (sign_extend:V2DI
9673
          (vec_select:V2HI
9674
            (match_operand:V8HI 1 "register_operand" "x")
9675
            (parallel [(const_int 0)
9676
                       (const_int 1)]))))]
9677
  "TARGET_SSE4_1"
9678
  "%vpmovsxwq\t{%1, %0|%0, %1}"
9679
  [(set_attr "type" "ssemov")
9680
   (set_attr "prefix_extra" "1")
9681
   (set_attr "prefix" "maybe_vex")
9682
   (set_attr "mode" "TI")])
9683
 
9684
(define_insn "*sse4_1_extendv2hiv2di2"
9685
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9686
        (sign_extend:V2DI
9687
          (vec_select:V2HI
9688
            (vec_duplicate:V8HI
9689
              (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
9690
            (parallel [(const_int 0)
9691
                       (const_int 1)]))))]
9692
  "TARGET_SSE4_1"
9693
  "%vpmovsxwq\t{%1, %0|%0, %1}"
9694
  [(set_attr "type" "ssemov")
9695
   (set_attr "prefix_extra" "1")
9696
   (set_attr "prefix" "maybe_vex")
9697
   (set_attr "mode" "TI")])
9698
 
9699
(define_insn "sse4_1_extendv2siv2di2"
9700
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9701
        (sign_extend:V2DI
9702
          (vec_select:V2SI
9703
            (match_operand:V4SI 1 "register_operand" "x")
9704
            (parallel [(const_int 0)
9705
                       (const_int 1)]))))]
9706
  "TARGET_SSE4_1"
9707
  "%vpmovsxdq\t{%1, %0|%0, %1}"
9708
  [(set_attr "type" "ssemov")
9709
   (set_attr "prefix_extra" "1")
9710
   (set_attr "prefix" "maybe_vex")
9711
   (set_attr "mode" "TI")])
9712
 
9713
(define_insn "*sse4_1_extendv2siv2di2"
9714
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9715
        (sign_extend:V2DI
9716
          (vec_select:V2SI
9717
            (vec_duplicate:V4SI
9718
              (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9719
            (parallel [(const_int 0)
9720
                       (const_int 1)]))))]
9721
  "TARGET_SSE4_1"
9722
  "%vpmovsxdq\t{%1, %0|%0, %1}"
9723
  [(set_attr "type" "ssemov")
9724
   (set_attr "prefix_extra" "1")
9725
   (set_attr "prefix" "maybe_vex")
9726
   (set_attr "mode" "TI")])
9727
 
9728
(define_insn "sse4_1_zero_extendv8qiv8hi2"
9729
  [(set (match_operand:V8HI 0 "register_operand" "=x")
9730
        (zero_extend:V8HI
9731
          (vec_select:V8QI
9732
            (match_operand:V16QI 1 "register_operand" "x")
9733
            (parallel [(const_int 0)
9734
                       (const_int 1)
9735
                       (const_int 2)
9736
                       (const_int 3)
9737
                       (const_int 4)
9738
                       (const_int 5)
9739
                       (const_int 6)
9740
                       (const_int 7)]))))]
9741
  "TARGET_SSE4_1"
9742
  "%vpmovzxbw\t{%1, %0|%0, %1}"
9743
  [(set_attr "type" "ssemov")
9744
   (set_attr "prefix_extra" "1")
9745
   (set_attr "prefix" "maybe_vex")
9746
   (set_attr "mode" "TI")])
9747
 
9748
(define_insn "*sse4_1_zero_extendv8qiv8hi2"
9749
  [(set (match_operand:V8HI 0 "register_operand" "=x")
9750
        (zero_extend:V8HI
9751
          (vec_select:V8QI
9752
            (vec_duplicate:V16QI
9753
              (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
9754
            (parallel [(const_int 0)
9755
                       (const_int 1)
9756
                       (const_int 2)
9757
                       (const_int 3)
9758
                       (const_int 4)
9759
                       (const_int 5)
9760
                       (const_int 6)
9761
                       (const_int 7)]))))]
9762
  "TARGET_SSE4_1"
9763
  "%vpmovzxbw\t{%1, %0|%0, %1}"
9764
  [(set_attr "type" "ssemov")
9765
   (set_attr "prefix_extra" "1")
9766
   (set_attr "prefix" "maybe_vex")
9767
   (set_attr "mode" "TI")])
9768
 
9769
(define_insn "sse4_1_zero_extendv4qiv4si2"
9770
  [(set (match_operand:V4SI 0 "register_operand" "=x")
9771
        (zero_extend:V4SI
9772
          (vec_select:V4QI
9773
            (match_operand:V16QI 1 "register_operand" "x")
9774
            (parallel [(const_int 0)
9775
                       (const_int 1)
9776
                       (const_int 2)
9777
                       (const_int 3)]))))]
9778
  "TARGET_SSE4_1"
9779
  "%vpmovzxbd\t{%1, %0|%0, %1}"
9780
  [(set_attr "type" "ssemov")
9781
   (set_attr "prefix_extra" "1")
9782
   (set_attr "prefix" "maybe_vex")
9783
   (set_attr "mode" "TI")])
9784
 
9785
(define_insn "*sse4_1_zero_extendv4qiv4si2"
9786
  [(set (match_operand:V4SI 0 "register_operand" "=x")
9787
        (zero_extend:V4SI
9788
          (vec_select:V4QI
9789
            (vec_duplicate:V16QI
9790
              (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
9791
            (parallel [(const_int 0)
9792
                       (const_int 1)
9793
                       (const_int 2)
9794
                       (const_int 3)]))))]
9795
  "TARGET_SSE4_1"
9796
  "%vpmovzxbd\t{%1, %0|%0, %1}"
9797
  [(set_attr "type" "ssemov")
9798
   (set_attr "prefix_extra" "1")
9799
   (set_attr "prefix" "maybe_vex")
9800
   (set_attr "mode" "TI")])
9801
 
9802
(define_insn "sse4_1_zero_extendv2qiv2di2"
9803
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9804
        (zero_extend:V2DI
9805
          (vec_select:V2QI
9806
            (match_operand:V16QI 1 "register_operand" "x")
9807
            (parallel [(const_int 0)
9808
                       (const_int 1)]))))]
9809
  "TARGET_SSE4_1"
9810
  "%vpmovzxbq\t{%1, %0|%0, %1}"
9811
  [(set_attr "type" "ssemov")
9812
   (set_attr "prefix_extra" "1")
9813
   (set_attr "prefix" "maybe_vex")
9814
   (set_attr "mode" "TI")])
9815
 
9816
(define_insn "*sse4_1_zero_extendv2qiv2di2"
9817
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9818
        (zero_extend:V2DI
9819
          (vec_select:V2QI
9820
            (vec_duplicate:V16QI
9821
              (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
9822
            (parallel [(const_int 0)
9823
                       (const_int 1)]))))]
9824
  "TARGET_SSE4_1"
9825
  "%vpmovzxbq\t{%1, %0|%0, %1}"
9826
  [(set_attr "type" "ssemov")
9827
   (set_attr "prefix_extra" "1")
9828
   (set_attr "prefix" "maybe_vex")
9829
   (set_attr "mode" "TI")])
9830
 
9831
(define_insn "sse4_1_zero_extendv4hiv4si2"
9832
  [(set (match_operand:V4SI 0 "register_operand" "=x")
9833
        (zero_extend:V4SI
9834
          (vec_select:V4HI
9835
            (match_operand:V8HI 1 "register_operand" "x")
9836
            (parallel [(const_int 0)
9837
                       (const_int 1)
9838
                       (const_int 2)
9839
                       (const_int 3)]))))]
9840
  "TARGET_SSE4_1"
9841
  "%vpmovzxwd\t{%1, %0|%0, %1}"
9842
  [(set_attr "type" "ssemov")
9843
   (set_attr "prefix_extra" "1")
9844
   (set_attr "prefix" "maybe_vex")
9845
   (set_attr "mode" "TI")])
9846
 
9847
(define_insn "*sse4_1_zero_extendv4hiv4si2"
9848
  [(set (match_operand:V4SI 0 "register_operand" "=x")
9849
        (zero_extend:V4SI
9850
          (vec_select:V4HI
9851
            (vec_duplicate:V8HI
9852
              (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
9853
            (parallel [(const_int 0)
9854
                       (const_int 1)
9855
                       (const_int 2)
9856
                       (const_int 3)]))))]
9857
  "TARGET_SSE4_1"
9858
  "%vpmovzxwd\t{%1, %0|%0, %1}"
9859
  [(set_attr "type" "ssemov")
9860
   (set_attr "prefix_extra" "1")
9861
   (set_attr "prefix" "maybe_vex")
9862
   (set_attr "mode" "TI")])
9863
 
9864
(define_insn "sse4_1_zero_extendv2hiv2di2"
9865
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9866
        (zero_extend:V2DI
9867
          (vec_select:V2HI
9868
            (match_operand:V8HI 1 "register_operand" "x")
9869
            (parallel [(const_int 0)
9870
                       (const_int 1)]))))]
9871
  "TARGET_SSE4_1"
9872
  "%vpmovzxwq\t{%1, %0|%0, %1}"
9873
  [(set_attr "type" "ssemov")
9874
   (set_attr "prefix_extra" "1")
9875
   (set_attr "prefix" "maybe_vex")
9876
   (set_attr "mode" "TI")])
9877
 
9878
(define_insn "*sse4_1_zero_extendv2hiv2di2"
9879
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9880
        (zero_extend:V2DI
9881
          (vec_select:V2HI
9882
            (vec_duplicate:V8HI
9883
              (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
9884
            (parallel [(const_int 0)
9885
                       (const_int 1)]))))]
9886
  "TARGET_SSE4_1"
9887
  "%vpmovzxwq\t{%1, %0|%0, %1}"
9888
  [(set_attr "type" "ssemov")
9889
   (set_attr "prefix_extra" "1")
9890
   (set_attr "prefix" "maybe_vex")
9891
   (set_attr "mode" "TI")])
9892
 
9893
(define_insn "sse4_1_zero_extendv2siv2di2"
9894
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9895
        (zero_extend:V2DI
9896
          (vec_select:V2SI
9897
            (match_operand:V4SI 1 "register_operand" "x")
9898
            (parallel [(const_int 0)
9899
                       (const_int 1)]))))]
9900
  "TARGET_SSE4_1"
9901
  "%vpmovzxdq\t{%1, %0|%0, %1}"
9902
  [(set_attr "type" "ssemov")
9903
   (set_attr "prefix_extra" "1")
9904
   (set_attr "prefix" "maybe_vex")
9905
   (set_attr "mode" "TI")])
9906
 
9907
(define_insn "*sse4_1_zero_extendv2siv2di2"
9908
  [(set (match_operand:V2DI 0 "register_operand" "=x")
9909
        (zero_extend:V2DI
9910
          (vec_select:V2SI
9911
            (vec_duplicate:V4SI
9912
              (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
9913
            (parallel [(const_int 0)
9914
                       (const_int 1)]))))]
9915
  "TARGET_SSE4_1"
9916
  "%vpmovzxdq\t{%1, %0|%0, %1}"
9917
  [(set_attr "type" "ssemov")
9918
   (set_attr "prefix_extra" "1")
9919
   (set_attr "prefix" "maybe_vex")
9920
   (set_attr "mode" "TI")])
9921
 
9922
;; ptestps/ptestpd are very similar to comiss and ucomiss when
9923
;; setting FLAGS_REG. But it is not a really compare instruction.
9924
(define_insn "avx_vtestp"
9925
  [(set (reg:CC FLAGS_REG)
9926
        (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
9927
                    (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9928
                   UNSPEC_VTESTP))]
9929
  "TARGET_AVX"
9930
  "vtestp\t{%1, %0|%0, %1}"
9931
  [(set_attr "type" "ssecomi")
9932
   (set_attr "prefix_extra" "1")
9933
   (set_attr "prefix" "vex")
9934
   (set_attr "mode" "")])
9935
 
9936
;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9937
;; But it is not a really compare instruction.
9938
(define_insn "avx_ptest256"
9939
  [(set (reg:CC FLAGS_REG)
9940
        (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9941
                    (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9942
                   UNSPEC_PTEST))]
9943
  "TARGET_AVX"
9944
  "vptest\t{%1, %0|%0, %1}"
9945
  [(set_attr "type" "ssecomi")
9946
   (set_attr "prefix_extra" "1")
9947
   (set_attr "prefix" "vex")
9948
   (set_attr "mode" "OI")])
9949
 
9950
(define_insn "sse4_1_ptest"
9951
  [(set (reg:CC FLAGS_REG)
9952
        (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9953
                    (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9954
                   UNSPEC_PTEST))]
9955
  "TARGET_SSE4_1"
9956
  "%vptest\t{%1, %0|%0, %1}"
9957
  [(set_attr "type" "ssecomi")
9958
   (set_attr "prefix_extra" "1")
9959
   (set_attr "prefix" "maybe_vex")
9960
   (set_attr "mode" "TI")])
9961
 
9962
(define_insn "avx_roundp256"
9963
  [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
9964
        (unspec:AVX256MODEF2P
9965
          [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
9966
           (match_operand:SI 2 "const_0_to_15_operand" "n")]
9967
          UNSPEC_ROUND))]
9968
  "TARGET_AVX"
9969
  "vroundp\t{%2, %1, %0|%0, %1, %2}"
9970
  [(set_attr "type" "ssecvt")
9971
   (set_attr "prefix_extra" "1")
9972
   (set_attr "length_immediate" "1")
9973
   (set_attr "prefix" "vex")
9974
   (set_attr "mode" "")])
9975
 
9976
(define_insn "sse4_1_roundp"
9977
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9978
        (unspec:SSEMODEF2P
9979
          [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
9980
           (match_operand:SI 2 "const_0_to_15_operand" "n")]
9981
          UNSPEC_ROUND))]
9982
  "TARGET_ROUND"
9983
  "%vroundp\t{%2, %1, %0|%0, %1, %2}"
9984
  [(set_attr "type" "ssecvt")
9985
   (set_attr "prefix_data16" "1")
9986
   (set_attr "prefix_extra" "1")
9987
   (set_attr "length_immediate" "1")
9988
   (set_attr "prefix" "maybe_vex")
9989
   (set_attr "mode" "")])
9990
 
9991
(define_insn "*avx_rounds"
9992
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9993
        (vec_merge:SSEMODEF2P
9994
          (unspec:SSEMODEF2P
9995
            [(match_operand:SSEMODEF2P 2 "register_operand" "x")
9996
             (match_operand:SI 3 "const_0_to_15_operand" "n")]
9997
            UNSPEC_ROUND)
9998
          (match_operand:SSEMODEF2P 1 "register_operand" "x")
9999
          (const_int 1)))]
10000
  "TARGET_AVX"
10001
  "vrounds\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10002
  [(set_attr "type" "ssecvt")
10003
   (set_attr "prefix_extra" "1")
10004
   (set_attr "length_immediate" "1")
10005
   (set_attr "prefix" "vex")
10006
   (set_attr "mode" "")])
10007
 
10008
(define_insn "sse4_1_rounds"
10009
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
10010
        (vec_merge:SSEMODEF2P
10011
          (unspec:SSEMODEF2P
10012
            [(match_operand:SSEMODEF2P 2 "register_operand" "x")
10013
             (match_operand:SI 3 "const_0_to_15_operand" "n")]
10014
            UNSPEC_ROUND)
10015
          (match_operand:SSEMODEF2P 1 "register_operand" "0")
10016
          (const_int 1)))]
10017
  "TARGET_ROUND"
10018
  "rounds\t{%3, %2, %0|%0, %2, %3}"
10019
  [(set_attr "type" "ssecvt")
10020
   (set_attr "prefix_data16" "1")
10021
   (set_attr "prefix_extra" "1")
10022
   (set_attr "length_immediate" "1")
10023
   (set_attr "mode" "")])
10024
 
10025
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10026
;;
10027
;; Intel SSE4.2 string/text processing instructions
10028
;;
10029
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10030
 
10031
(define_insn_and_split "sse4_2_pcmpestr"
10032
  [(set (match_operand:SI 0 "register_operand" "=c,c")
10033
        (unspec:SI
10034
          [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10035
           (match_operand:SI 3 "register_operand" "a,a")
10036
           (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
10037
           (match_operand:SI 5 "register_operand" "d,d")
10038
           (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
10039
          UNSPEC_PCMPESTR))
10040
   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10041
        (unspec:V16QI
10042
          [(match_dup 2)
10043
           (match_dup 3)
10044
           (match_dup 4)
10045
           (match_dup 5)
10046
           (match_dup 6)]
10047
          UNSPEC_PCMPESTR))
10048
   (set (reg:CC FLAGS_REG)
10049
        (unspec:CC
10050
          [(match_dup 2)
10051
           (match_dup 3)
10052
           (match_dup 4)
10053
           (match_dup 5)
10054
           (match_dup 6)]
10055
          UNSPEC_PCMPESTR))]
10056
  "TARGET_SSE4_2
10057
   && can_create_pseudo_p ()"
10058
  "#"
10059
  "&& 1"
10060
  [(const_int 0)]
10061
{
10062
  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10063
  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10064
  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10065
 
10066
  if (ecx)
10067
    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
10068
                                     operands[3], operands[4],
10069
                                     operands[5], operands[6]));
10070
  if (xmm0)
10071
    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
10072
                                     operands[3], operands[4],
10073
                                     operands[5], operands[6]));
10074
  if (flags && !(ecx || xmm0))
10075
    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
10076
                                           operands[2], operands[3],
10077
                                           operands[4], operands[5],
10078
                                           operands[6]));
10079
  DONE;
10080
}
10081
  [(set_attr "type" "sselog")
10082
   (set_attr "prefix_data16" "1")
10083
   (set_attr "prefix_extra" "1")
10084
   (set_attr "length_immediate" "1")
10085
   (set_attr "memory" "none,load")
10086
   (set_attr "mode" "TI")])
10087
 
10088
(define_insn "sse4_2_pcmpestri"
10089
  [(set (match_operand:SI 0 "register_operand" "=c,c")
10090
        (unspec:SI
10091
          [(match_operand:V16QI 1 "register_operand" "x,x")
10092
           (match_operand:SI 2 "register_operand" "a,a")
10093
           (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10094
           (match_operand:SI 4 "register_operand" "d,d")
10095
           (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10096
          UNSPEC_PCMPESTR))
10097
   (set (reg:CC FLAGS_REG)
10098
        (unspec:CC
10099
          [(match_dup 1)
10100
           (match_dup 2)
10101
           (match_dup 3)
10102
           (match_dup 4)
10103
           (match_dup 5)]
10104
          UNSPEC_PCMPESTR))]
10105
  "TARGET_SSE4_2"
10106
  "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10107
  [(set_attr "type" "sselog")
10108
   (set_attr "prefix_data16" "1")
10109
   (set_attr "prefix_extra" "1")
10110
   (set_attr "prefix" "maybe_vex")
10111
   (set_attr "length_immediate" "1")
10112
   (set_attr "memory" "none,load")
10113
   (set_attr "mode" "TI")])
10114
 
10115
(define_insn "sse4_2_pcmpestrm"
10116
  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10117
        (unspec:V16QI
10118
          [(match_operand:V16QI 1 "register_operand" "x,x")
10119
           (match_operand:SI 2 "register_operand" "a,a")
10120
           (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10121
           (match_operand:SI 4 "register_operand" "d,d")
10122
           (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10123
          UNSPEC_PCMPESTR))
10124
   (set (reg:CC FLAGS_REG)
10125
        (unspec:CC
10126
          [(match_dup 1)
10127
           (match_dup 2)
10128
           (match_dup 3)
10129
           (match_dup 4)
10130
           (match_dup 5)]
10131
          UNSPEC_PCMPESTR))]
10132
  "TARGET_SSE4_2"
10133
  "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10134
  [(set_attr "type" "sselog")
10135
   (set_attr "prefix_data16" "1")
10136
   (set_attr "prefix_extra" "1")
10137
   (set_attr "length_immediate" "1")
10138
   (set_attr "prefix" "maybe_vex")
10139
   (set_attr "memory" "none,load")
10140
   (set_attr "mode" "TI")])
10141
 
10142
(define_insn "sse4_2_pcmpestr_cconly"
10143
  [(set (reg:CC FLAGS_REG)
10144
        (unspec:CC
10145
          [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10146
           (match_operand:SI 3 "register_operand" "a,a,a,a")
10147
           (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10148
           (match_operand:SI 5 "register_operand" "d,d,d,d")
10149
           (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10150
          UNSPEC_PCMPESTR))
10151
   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10152
   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
10153
  "TARGET_SSE4_2"
10154
  "@
10155
   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10156
   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10157
   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10158
   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10159
  [(set_attr "type" "sselog")
10160
   (set_attr "prefix_data16" "1")
10161
   (set_attr "prefix_extra" "1")
10162
   (set_attr "length_immediate" "1")
10163
   (set_attr "memory" "none,load,none,load")
10164
   (set_attr "prefix" "maybe_vex")
10165
   (set_attr "mode" "TI")])
10166
 
10167
(define_insn_and_split "sse4_2_pcmpistr"
10168
  [(set (match_operand:SI 0 "register_operand" "=c,c")
10169
        (unspec:SI
10170
          [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10171
           (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10172
           (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10173
          UNSPEC_PCMPISTR))
10174
   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10175
        (unspec:V16QI
10176
          [(match_dup 2)
10177
           (match_dup 3)
10178
           (match_dup 4)]
10179
          UNSPEC_PCMPISTR))
10180
   (set (reg:CC FLAGS_REG)
10181
        (unspec:CC
10182
          [(match_dup 2)
10183
           (match_dup 3)
10184
           (match_dup 4)]
10185
          UNSPEC_PCMPISTR))]
10186
  "TARGET_SSE4_2
10187
   && can_create_pseudo_p ()"
10188
  "#"
10189
  "&& 1"
10190
  [(const_int 0)]
10191
{
10192
  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10193
  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10194
  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10195
 
10196
  if (ecx)
10197
    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10198
                                     operands[3], operands[4]));
10199
  if (xmm0)
10200
    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10201
                                     operands[3], operands[4]));
10202
  if (flags && !(ecx || xmm0))
10203
    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10204
                                           operands[2], operands[3],
10205
                                           operands[4]));
10206
  DONE;
10207
}
10208
  [(set_attr "type" "sselog")
10209
   (set_attr "prefix_data16" "1")
10210
   (set_attr "prefix_extra" "1")
10211
   (set_attr "length_immediate" "1")
10212
   (set_attr "memory" "none,load")
10213
   (set_attr "mode" "TI")])
10214
 
10215
(define_insn "sse4_2_pcmpistri"
10216
  [(set (match_operand:SI 0 "register_operand" "=c,c")
10217
        (unspec:SI
10218
          [(match_operand:V16QI 1 "register_operand" "x,x")
10219
           (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10220
           (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10221
          UNSPEC_PCMPISTR))
10222
   (set (reg:CC FLAGS_REG)
10223
        (unspec:CC
10224
          [(match_dup 1)
10225
           (match_dup 2)
10226
           (match_dup 3)]
10227
          UNSPEC_PCMPISTR))]
10228
  "TARGET_SSE4_2"
10229
  "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10230
  [(set_attr "type" "sselog")
10231
   (set_attr "prefix_data16" "1")
10232
   (set_attr "prefix_extra" "1")
10233
   (set_attr "length_immediate" "1")
10234
   (set_attr "prefix" "maybe_vex")
10235
   (set_attr "memory" "none,load")
10236
   (set_attr "mode" "TI")])
10237
 
10238
(define_insn "sse4_2_pcmpistrm"
10239
  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10240
        (unspec:V16QI
10241
          [(match_operand:V16QI 1 "register_operand" "x,x")
10242
           (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10243
           (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10244
          UNSPEC_PCMPISTR))
10245
   (set (reg:CC FLAGS_REG)
10246
        (unspec:CC
10247
          [(match_dup 1)
10248
           (match_dup 2)
10249
           (match_dup 3)]
10250
          UNSPEC_PCMPISTR))]
10251
  "TARGET_SSE4_2"
10252
  "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10253
  [(set_attr "type" "sselog")
10254
   (set_attr "prefix_data16" "1")
10255
   (set_attr "prefix_extra" "1")
10256
   (set_attr "length_immediate" "1")
10257
   (set_attr "prefix" "maybe_vex")
10258
   (set_attr "memory" "none,load")
10259
   (set_attr "mode" "TI")])
10260
 
10261
(define_insn "sse4_2_pcmpistr_cconly"
10262
  [(set (reg:CC FLAGS_REG)
10263
        (unspec:CC
10264
          [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10265
           (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10266
           (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10267
          UNSPEC_PCMPISTR))
10268
   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10269
   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
10270
  "TARGET_SSE4_2"
10271
  "@
10272
   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10273
   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10274
   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10275
   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10276
  [(set_attr "type" "sselog")
10277
   (set_attr "prefix_data16" "1")
10278
   (set_attr "prefix_extra" "1")
10279
   (set_attr "length_immediate" "1")
10280
   (set_attr "memory" "none,load,none,load")
10281
   (set_attr "prefix" "maybe_vex")
10282
   (set_attr "mode" "TI")])
10283
 
10284
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10285
;;
10286
;; XOP instructions
10287
;;
10288
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10289
 
10290
;; XOP parallel integer multiply/add instructions.
10291
;; Note the XOP multiply/add instructions
10292
;;     a[i] = b[i] * c[i] + d[i];
10293
;; do not allow the value being added to be a memory operation.
10294
(define_insn "xop_pmacsww"
10295
  [(set (match_operand:V8HI 0 "register_operand" "=x")
10296
        (plus:V8HI
10297
         (mult:V8HI
10298
          (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10299
          (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10300
         (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10301
  "TARGET_XOP"
10302
  "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10303
  [(set_attr "type" "ssemuladd")
10304
   (set_attr "mode" "TI")])
10305
 
10306
(define_insn "xop_pmacssww"
10307
  [(set (match_operand:V8HI 0 "register_operand" "=x")
10308
        (ss_plus:V8HI
10309
         (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10310
                    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10311
         (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10312
  "TARGET_XOP"
10313
  "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10314
  [(set_attr "type" "ssemuladd")
10315
   (set_attr "mode" "TI")])
10316
 
10317
(define_insn "xop_pmacsdd"
10318
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10319
        (plus:V4SI
10320
         (mult:V4SI
10321
          (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10322
          (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10323
         (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10324
  "TARGET_XOP"
10325
  "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10326
  [(set_attr "type" "ssemuladd")
10327
   (set_attr "mode" "TI")])
10328
 
10329
(define_insn "xop_pmacssdd"
10330
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10331
        (ss_plus:V4SI
10332
         (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10333
                    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10334
         (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10335
  "TARGET_XOP"
10336
  "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10337
  [(set_attr "type" "ssemuladd")
10338
   (set_attr "mode" "TI")])
10339
 
10340
(define_insn "xop_pmacssdql"
10341
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10342
        (ss_plus:V2DI
10343
         (mult:V2DI
10344
          (sign_extend:V2DI
10345
           (vec_select:V2SI
10346
            (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10347
            (parallel [(const_int 1)
10348
                       (const_int 3)])))
10349
          (vec_select:V2SI
10350
           (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10351
           (parallel [(const_int 1)
10352
                      (const_int 3)])))
10353
         (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10354
  "TARGET_XOP"
10355
  "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10356
  [(set_attr "type" "ssemuladd")
10357
   (set_attr "mode" "TI")])
10358
 
10359
(define_insn "xop_pmacssdqh"
10360
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10361
        (ss_plus:V2DI
10362
         (mult:V2DI
10363
          (sign_extend:V2DI
10364
           (vec_select:V2SI
10365
            (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10366
            (parallel [(const_int 0)
10367
                       (const_int 2)])))
10368
          (sign_extend:V2DI
10369
           (vec_select:V2SI
10370
            (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10371
            (parallel [(const_int 0)
10372
                       (const_int 2)]))))
10373
         (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10374
  "TARGET_XOP"
10375
  "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10376
  [(set_attr "type" "ssemuladd")
10377
   (set_attr "mode" "TI")])
10378
 
10379
(define_insn "xop_pmacsdql"
10380
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10381
        (plus:V2DI
10382
         (mult:V2DI
10383
          (sign_extend:V2DI
10384
           (vec_select:V2SI
10385
            (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10386
            (parallel [(const_int 1)
10387
                       (const_int 3)])))
10388
          (sign_extend:V2DI
10389
           (vec_select:V2SI
10390
            (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10391
            (parallel [(const_int 1)
10392
                       (const_int 3)]))))
10393
         (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10394
  "TARGET_XOP"
10395
  "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10396
  [(set_attr "type" "ssemuladd")
10397
   (set_attr "mode" "TI")])
10398
 
10399
;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10400
;; fake it with a multiply/add.  In general, we expect the define_split to
10401
;; occur before register allocation, so we have to handle the corner case where
10402
;; the target is the same as operands 1/2
10403
(define_insn_and_split "xop_mulv2div2di3_low"
10404
  [(set (match_operand:V2DI 0 "register_operand" "=&x")
10405
        (mult:V2DI
10406
          (sign_extend:V2DI
10407
            (vec_select:V2SI
10408
              (match_operand:V4SI 1 "register_operand" "%x")
10409
              (parallel [(const_int 1)
10410
                         (const_int 3)])))
10411
          (sign_extend:V2DI
10412
            (vec_select:V2SI
10413
              (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10414
              (parallel [(const_int 1)
10415
                         (const_int 3)])))))]
10416
  "TARGET_XOP"
10417
  "#"
10418
  "&& reload_completed"
10419
  [(set (match_dup 0)
10420
        (match_dup 3))
10421
   (set (match_dup 0)
10422
        (plus:V2DI
10423
         (mult:V2DI
10424
          (sign_extend:V2DI
10425
           (vec_select:V2SI
10426
            (match_dup 1)
10427
            (parallel [(const_int 1)
10428
                       (const_int 3)])))
10429
          (sign_extend:V2DI
10430
           (vec_select:V2SI
10431
            (match_dup 2)
10432
            (parallel [(const_int 1)
10433
                       (const_int 3)]))))
10434
         (match_dup 0)))]
10435
{
10436
  operands[3] = CONST0_RTX (V2DImode);
10437
}
10438
  [(set_attr "type" "ssemul")
10439
   (set_attr "mode" "TI")])
10440
 
10441
(define_insn "xop_pmacsdqh"
10442
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10443
        (plus:V2DI
10444
         (mult:V2DI
10445
          (sign_extend:V2DI
10446
           (vec_select:V2SI
10447
            (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10448
            (parallel [(const_int 0)
10449
                       (const_int 2)])))
10450
          (sign_extend:V2DI
10451
           (vec_select:V2SI
10452
            (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10453
            (parallel [(const_int 0)
10454
                       (const_int 2)]))))
10455
         (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10456
  "TARGET_XOP"
10457
  "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10458
  [(set_attr "type" "ssemuladd")
10459
   (set_attr "mode" "TI")])
10460
 
10461
;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10462
;; fake it with a multiply/add.  In general, we expect the define_split to
10463
;; occur before register allocation, so we have to handle the corner case where
10464
;; the target is the same as either operands[1] or operands[2]
10465
(define_insn_and_split "xop_mulv2div2di3_high"
10466
  [(set (match_operand:V2DI 0 "register_operand" "=&x")
10467
        (mult:V2DI
10468
          (sign_extend:V2DI
10469
            (vec_select:V2SI
10470
              (match_operand:V4SI 1 "register_operand" "%x")
10471
              (parallel [(const_int 0)
10472
                         (const_int 2)])))
10473
          (sign_extend:V2DI
10474
            (vec_select:V2SI
10475
              (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10476
              (parallel [(const_int 0)
10477
                         (const_int 2)])))))]
10478
  "TARGET_XOP"
10479
  "#"
10480
  "&& reload_completed"
10481
  [(set (match_dup 0)
10482
        (match_dup 3))
10483
   (set (match_dup 0)
10484
        (plus:V2DI
10485
         (mult:V2DI
10486
          (sign_extend:V2DI
10487
           (vec_select:V2SI
10488
            (match_dup 1)
10489
            (parallel [(const_int 0)
10490
                       (const_int 2)])))
10491
          (sign_extend:V2DI
10492
           (vec_select:V2SI
10493
            (match_dup 2)
10494
            (parallel [(const_int 0)
10495
                       (const_int 2)]))))
10496
         (match_dup 0)))]
10497
{
10498
  operands[3] = CONST0_RTX (V2DImode);
10499
}
10500
  [(set_attr "type" "ssemul")
10501
   (set_attr "mode" "TI")])
10502
 
10503
;; XOP parallel integer multiply/add instructions for the intrinisics
10504
(define_insn "xop_pmacsswd"
10505
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10506
        (ss_plus:V4SI
10507
         (mult:V4SI
10508
          (sign_extend:V4SI
10509
           (vec_select:V4HI
10510
            (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10511
            (parallel [(const_int 1)
10512
                       (const_int 3)
10513
                       (const_int 5)
10514
                       (const_int 7)])))
10515
          (sign_extend:V4SI
10516
           (vec_select:V4HI
10517
            (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10518
            (parallel [(const_int 1)
10519
                       (const_int 3)
10520
                       (const_int 5)
10521
                       (const_int 7)]))))
10522
         (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10523
  "TARGET_XOP"
10524
  "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10525
  [(set_attr "type" "ssemuladd")
10526
   (set_attr "mode" "TI")])
10527
 
10528
(define_insn "xop_pmacswd"
10529
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10530
        (plus:V4SI
10531
         (mult:V4SI
10532
          (sign_extend:V4SI
10533
           (vec_select:V4HI
10534
            (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10535
            (parallel [(const_int 1)
10536
                       (const_int 3)
10537
                       (const_int 5)
10538
                       (const_int 7)])))
10539
          (sign_extend:V4SI
10540
           (vec_select:V4HI
10541
            (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10542
            (parallel [(const_int 1)
10543
                       (const_int 3)
10544
                       (const_int 5)
10545
                       (const_int 7)]))))
10546
         (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10547
  "TARGET_XOP"
10548
  "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10549
  [(set_attr "type" "ssemuladd")
10550
   (set_attr "mode" "TI")])
10551
 
10552
(define_insn "xop_pmadcsswd"
10553
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10554
        (ss_plus:V4SI
10555
         (plus:V4SI
10556
          (mult:V4SI
10557
           (sign_extend:V4SI
10558
            (vec_select:V4HI
10559
             (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10560
             (parallel [(const_int 0)
10561
                        (const_int 2)
10562
                        (const_int 4)
10563
                        (const_int 6)])))
10564
           (sign_extend:V4SI
10565
            (vec_select:V4HI
10566
             (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10567
             (parallel [(const_int 0)
10568
                        (const_int 2)
10569
                        (const_int 4)
10570
                        (const_int 6)]))))
10571
          (mult:V4SI
10572
           (sign_extend:V4SI
10573
            (vec_select:V4HI
10574
             (match_dup 1)
10575
             (parallel [(const_int 1)
10576
                        (const_int 3)
10577
                        (const_int 5)
10578
                        (const_int 7)])))
10579
           (sign_extend:V4SI
10580
            (vec_select:V4HI
10581
             (match_dup 2)
10582
             (parallel [(const_int 1)
10583
                        (const_int 3)
10584
                        (const_int 5)
10585
                        (const_int 7)])))))
10586
         (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10587
  "TARGET_XOP"
10588
  "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10589
  [(set_attr "type" "ssemuladd")
10590
   (set_attr "mode" "TI")])
10591
 
10592
(define_insn "xop_pmadcswd"
10593
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10594
        (plus:V4SI
10595
         (plus:V4SI
10596
          (mult:V4SI
10597
           (sign_extend:V4SI
10598
            (vec_select:V4HI
10599
             (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10600
             (parallel [(const_int 0)
10601
                        (const_int 2)
10602
                        (const_int 4)
10603
                        (const_int 6)])))
10604
           (sign_extend:V4SI
10605
            (vec_select:V4HI
10606
             (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10607
             (parallel [(const_int 0)
10608
                        (const_int 2)
10609
                        (const_int 4)
10610
                        (const_int 6)]))))
10611
          (mult:V4SI
10612
           (sign_extend:V4SI
10613
            (vec_select:V4HI
10614
             (match_dup 1)
10615
             (parallel [(const_int 1)
10616
                        (const_int 3)
10617
                        (const_int 5)
10618
                        (const_int 7)])))
10619
           (sign_extend:V4SI
10620
            (vec_select:V4HI
10621
             (match_dup 2)
10622
             (parallel [(const_int 1)
10623
                        (const_int 3)
10624
                        (const_int 5)
10625
                        (const_int 7)])))))
10626
         (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10627
  "TARGET_XOP"
10628
  "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10629
  [(set_attr "type" "ssemuladd")
10630
   (set_attr "mode" "TI")])
10631
 
10632
;; XOP parallel XMM conditional moves
10633
(define_insn "xop_pcmov_"
10634
  [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10635
        (if_then_else:SSEMODE
10636
          (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10637
          (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10638
          (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10639
  "TARGET_XOP"
10640
  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10641
  [(set_attr "type" "sse4arg")])
10642
 
10643
(define_insn "xop_pcmov_256"
10644
  [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10645
        (if_then_else:AVX256MODE
10646
          (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10647
          (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10648
          (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10649
  "TARGET_XOP"
10650
  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10651
  [(set_attr "type" "sse4arg")])
10652
 
10653
;; XOP horizontal add/subtract instructions
10654
(define_insn "xop_phaddbw"
10655
  [(set (match_operand:V8HI 0 "register_operand" "=x")
10656
        (plus:V8HI
10657
         (sign_extend:V8HI
10658
          (vec_select:V8QI
10659
           (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10660
           (parallel [(const_int 0)
10661
                      (const_int 2)
10662
                      (const_int 4)
10663
                      (const_int 6)
10664
                      (const_int 8)
10665
                      (const_int 10)
10666
                      (const_int 12)
10667
                      (const_int 14)])))
10668
         (sign_extend:V8HI
10669
          (vec_select:V8QI
10670
           (match_dup 1)
10671
           (parallel [(const_int 1)
10672
                      (const_int 3)
10673
                      (const_int 5)
10674
                      (const_int 7)
10675
                      (const_int 9)
10676
                      (const_int 11)
10677
                      (const_int 13)
10678
                      (const_int 15)])))))]
10679
  "TARGET_XOP"
10680
  "vphaddbw\t{%1, %0|%0, %1}"
10681
  [(set_attr "type" "sseiadd1")])
10682
 
10683
(define_insn "xop_phaddbd"
10684
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10685
        (plus:V4SI
10686
         (plus:V4SI
10687
          (sign_extend:V4SI
10688
           (vec_select:V4QI
10689
            (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10690
            (parallel [(const_int 0)
10691
                       (const_int 4)
10692
                       (const_int 8)
10693
                       (const_int 12)])))
10694
          (sign_extend:V4SI
10695
           (vec_select:V4QI
10696
            (match_dup 1)
10697
            (parallel [(const_int 1)
10698
                       (const_int 5)
10699
                       (const_int 9)
10700
                       (const_int 13)]))))
10701
         (plus:V4SI
10702
          (sign_extend:V4SI
10703
           (vec_select:V4QI
10704
            (match_dup 1)
10705
            (parallel [(const_int 2)
10706
                       (const_int 6)
10707
                       (const_int 10)
10708
                       (const_int 14)])))
10709
          (sign_extend:V4SI
10710
           (vec_select:V4QI
10711
            (match_dup 1)
10712
            (parallel [(const_int 3)
10713
                       (const_int 7)
10714
                       (const_int 11)
10715
                       (const_int 15)]))))))]
10716
  "TARGET_XOP"
10717
  "vphaddbd\t{%1, %0|%0, %1}"
10718
  [(set_attr "type" "sseiadd1")])
10719
 
10720
(define_insn "xop_phaddbq"
10721
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10722
        (plus:V2DI
10723
         (plus:V2DI
10724
          (plus:V2DI
10725
           (sign_extend:V2DI
10726
            (vec_select:V2QI
10727
             (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10728
             (parallel [(const_int 0)
10729
                        (const_int 4)])))
10730
           (sign_extend:V2DI
10731
            (vec_select:V2QI
10732
             (match_dup 1)
10733
             (parallel [(const_int 1)
10734
                        (const_int 5)]))))
10735
          (plus:V2DI
10736
           (sign_extend:V2DI
10737
            (vec_select:V2QI
10738
             (match_dup 1)
10739
             (parallel [(const_int 2)
10740
                        (const_int 6)])))
10741
           (sign_extend:V2DI
10742
            (vec_select:V2QI
10743
             (match_dup 1)
10744
             (parallel [(const_int 3)
10745
                        (const_int 7)])))))
10746
         (plus:V2DI
10747
          (plus:V2DI
10748
           (sign_extend:V2DI
10749
            (vec_select:V2QI
10750
             (match_dup 1)
10751
             (parallel [(const_int 8)
10752
                        (const_int 12)])))
10753
           (sign_extend:V2DI
10754
            (vec_select:V2QI
10755
             (match_dup 1)
10756
             (parallel [(const_int 9)
10757
                        (const_int 13)]))))
10758
          (plus:V2DI
10759
           (sign_extend:V2DI
10760
            (vec_select:V2QI
10761
             (match_dup 1)
10762
             (parallel [(const_int 10)
10763
                        (const_int 14)])))
10764
           (sign_extend:V2DI
10765
            (vec_select:V2QI
10766
             (match_dup 1)
10767
             (parallel [(const_int 11)
10768
                        (const_int 15)])))))))]
10769
  "TARGET_XOP"
10770
  "vphaddbq\t{%1, %0|%0, %1}"
10771
  [(set_attr "type" "sseiadd1")])
10772
 
10773
(define_insn "xop_phaddwd"
10774
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10775
        (plus:V4SI
10776
         (sign_extend:V4SI
10777
          (vec_select:V4HI
10778
           (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10779
           (parallel [(const_int 0)
10780
                      (const_int 2)
10781
                      (const_int 4)
10782
                      (const_int 6)])))
10783
         (sign_extend:V4SI
10784
          (vec_select:V4HI
10785
           (match_dup 1)
10786
           (parallel [(const_int 1)
10787
                      (const_int 3)
10788
                      (const_int 5)
10789
                      (const_int 7)])))))]
10790
  "TARGET_XOP"
10791
  "vphaddwd\t{%1, %0|%0, %1}"
10792
  [(set_attr "type" "sseiadd1")])
10793
 
10794
(define_insn "xop_phaddwq"
10795
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10796
        (plus:V2DI
10797
         (plus:V2DI
10798
          (sign_extend:V2DI
10799
           (vec_select:V2HI
10800
            (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10801
            (parallel [(const_int 0)
10802
                       (const_int 4)])))
10803
          (sign_extend:V2DI
10804
           (vec_select:V2HI
10805
            (match_dup 1)
10806
            (parallel [(const_int 1)
10807
                       (const_int 5)]))))
10808
         (plus:V2DI
10809
          (sign_extend:V2DI
10810
           (vec_select:V2HI
10811
            (match_dup 1)
10812
            (parallel [(const_int 2)
10813
                       (const_int 6)])))
10814
          (sign_extend:V2DI
10815
           (vec_select:V2HI
10816
            (match_dup 1)
10817
            (parallel [(const_int 3)
10818
                       (const_int 7)]))))))]
10819
  "TARGET_XOP"
10820
  "vphaddwq\t{%1, %0|%0, %1}"
10821
  [(set_attr "type" "sseiadd1")])
10822
 
10823
(define_insn "xop_phadddq"
10824
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10825
        (plus:V2DI
10826
         (sign_extend:V2DI
10827
          (vec_select:V2SI
10828
           (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10829
           (parallel [(const_int 0)
10830
                      (const_int 2)])))
10831
         (sign_extend:V2DI
10832
          (vec_select:V2SI
10833
           (match_dup 1)
10834
           (parallel [(const_int 1)
10835
                      (const_int 3)])))))]
10836
  "TARGET_XOP"
10837
  "vphadddq\t{%1, %0|%0, %1}"
10838
  [(set_attr "type" "sseiadd1")])
10839
 
10840
(define_insn "xop_phaddubw"
10841
  [(set (match_operand:V8HI 0 "register_operand" "=x")
10842
        (plus:V8HI
10843
         (zero_extend:V8HI
10844
          (vec_select:V8QI
10845
           (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10846
           (parallel [(const_int 0)
10847
                      (const_int 2)
10848
                      (const_int 4)
10849
                      (const_int 6)
10850
                      (const_int 8)
10851
                      (const_int 10)
10852
                      (const_int 12)
10853
                      (const_int 14)])))
10854
         (zero_extend:V8HI
10855
          (vec_select:V8QI
10856
           (match_dup 1)
10857
           (parallel [(const_int 1)
10858
                      (const_int 3)
10859
                      (const_int 5)
10860
                      (const_int 7)
10861
                      (const_int 9)
10862
                      (const_int 11)
10863
                      (const_int 13)
10864
                      (const_int 15)])))))]
10865
  "TARGET_XOP"
10866
  "vphaddubw\t{%1, %0|%0, %1}"
10867
  [(set_attr "type" "sseiadd1")])
10868
 
10869
(define_insn "xop_phaddubd"
10870
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10871
        (plus:V4SI
10872
         (plus:V4SI
10873
          (zero_extend:V4SI
10874
           (vec_select:V4QI
10875
            (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10876
            (parallel [(const_int 0)
10877
                       (const_int 4)
10878
                       (const_int 8)
10879
                       (const_int 12)])))
10880
          (zero_extend:V4SI
10881
           (vec_select:V4QI
10882
            (match_dup 1)
10883
            (parallel [(const_int 1)
10884
                       (const_int 5)
10885
                       (const_int 9)
10886
                       (const_int 13)]))))
10887
         (plus:V4SI
10888
          (zero_extend:V4SI
10889
           (vec_select:V4QI
10890
            (match_dup 1)
10891
            (parallel [(const_int 2)
10892
                       (const_int 6)
10893
                       (const_int 10)
10894
                       (const_int 14)])))
10895
          (zero_extend:V4SI
10896
           (vec_select:V4QI
10897
            (match_dup 1)
10898
            (parallel [(const_int 3)
10899
                       (const_int 7)
10900
                       (const_int 11)
10901
                       (const_int 15)]))))))]
10902
  "TARGET_XOP"
10903
  "vphaddubd\t{%1, %0|%0, %1}"
10904
  [(set_attr "type" "sseiadd1")])
10905
 
10906
(define_insn "xop_phaddubq"
10907
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10908
        (plus:V2DI
10909
         (plus:V2DI
10910
          (plus:V2DI
10911
           (zero_extend:V2DI
10912
            (vec_select:V2QI
10913
             (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10914
             (parallel [(const_int 0)
10915
                        (const_int 4)])))
10916
           (sign_extend:V2DI
10917
            (vec_select:V2QI
10918
             (match_dup 1)
10919
             (parallel [(const_int 1)
10920
                        (const_int 5)]))))
10921
          (plus:V2DI
10922
           (zero_extend:V2DI
10923
            (vec_select:V2QI
10924
             (match_dup 1)
10925
             (parallel [(const_int 2)
10926
                        (const_int 6)])))
10927
           (zero_extend:V2DI
10928
            (vec_select:V2QI
10929
             (match_dup 1)
10930
             (parallel [(const_int 3)
10931
                        (const_int 7)])))))
10932
         (plus:V2DI
10933
          (plus:V2DI
10934
           (zero_extend:V2DI
10935
            (vec_select:V2QI
10936
             (match_dup 1)
10937
             (parallel [(const_int 8)
10938
                        (const_int 12)])))
10939
           (sign_extend:V2DI
10940
            (vec_select:V2QI
10941
             (match_dup 1)
10942
             (parallel [(const_int 9)
10943
                        (const_int 13)]))))
10944
          (plus:V2DI
10945
           (zero_extend:V2DI
10946
            (vec_select:V2QI
10947
             (match_dup 1)
10948
             (parallel [(const_int 10)
10949
                        (const_int 14)])))
10950
           (zero_extend:V2DI
10951
            (vec_select:V2QI
10952
             (match_dup 1)
10953
             (parallel [(const_int 11)
10954
                        (const_int 15)])))))))]
10955
  "TARGET_XOP"
10956
  "vphaddubq\t{%1, %0|%0, %1}"
10957
  [(set_attr "type" "sseiadd1")])
10958
 
10959
(define_insn "xop_phadduwd"
10960
  [(set (match_operand:V4SI 0 "register_operand" "=x")
10961
        (plus:V4SI
10962
         (zero_extend:V4SI
10963
          (vec_select:V4HI
10964
           (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10965
           (parallel [(const_int 0)
10966
                      (const_int 2)
10967
                      (const_int 4)
10968
                      (const_int 6)])))
10969
         (zero_extend:V4SI
10970
          (vec_select:V4HI
10971
           (match_dup 1)
10972
           (parallel [(const_int 1)
10973
                      (const_int 3)
10974
                      (const_int 5)
10975
                      (const_int 7)])))))]
10976
  "TARGET_XOP"
10977
  "vphadduwd\t{%1, %0|%0, %1}"
10978
  [(set_attr "type" "sseiadd1")])
10979
 
10980
(define_insn "xop_phadduwq"
10981
  [(set (match_operand:V2DI 0 "register_operand" "=x")
10982
        (plus:V2DI
10983
         (plus:V2DI
10984
          (zero_extend:V2DI
10985
           (vec_select:V2HI
10986
            (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10987
            (parallel [(const_int 0)
10988
                       (const_int 4)])))
10989
          (zero_extend:V2DI
10990
           (vec_select:V2HI
10991
            (match_dup 1)
10992
            (parallel [(const_int 1)
10993
                       (const_int 5)]))))
10994
         (plus:V2DI
10995
          (zero_extend:V2DI
10996
           (vec_select:V2HI
10997
            (match_dup 1)
10998
            (parallel [(const_int 2)
10999
                       (const_int 6)])))
11000
          (zero_extend:V2DI
11001
           (vec_select:V2HI
11002
            (match_dup 1)
11003
            (parallel [(const_int 3)
11004
                       (const_int 7)]))))))]
11005
  "TARGET_XOP"
11006
  "vphadduwq\t{%1, %0|%0, %1}"
11007
  [(set_attr "type" "sseiadd1")])
11008
 
11009
(define_insn "xop_phaddudq"
11010
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11011
        (plus:V2DI
11012
         (zero_extend:V2DI
11013
          (vec_select:V2SI
11014
           (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11015
           (parallel [(const_int 0)
11016
                      (const_int 2)])))
11017
         (zero_extend:V2DI
11018
          (vec_select:V2SI
11019
           (match_dup 1)
11020
           (parallel [(const_int 1)
11021
                      (const_int 3)])))))]
11022
  "TARGET_XOP"
11023
  "vphaddudq\t{%1, %0|%0, %1}"
11024
  [(set_attr "type" "sseiadd1")])
11025
 
11026
(define_insn "xop_phsubbw"
11027
  [(set (match_operand:V8HI 0 "register_operand" "=x")
11028
        (minus:V8HI
11029
         (sign_extend:V8HI
11030
          (vec_select:V8QI
11031
           (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11032
           (parallel [(const_int 0)
11033
                      (const_int 2)
11034
                      (const_int 4)
11035
                      (const_int 6)
11036
                      (const_int 8)
11037
                      (const_int 10)
11038
                      (const_int 12)
11039
                      (const_int 14)])))
11040
         (sign_extend:V8HI
11041
          (vec_select:V8QI
11042
           (match_dup 1)
11043
           (parallel [(const_int 1)
11044
                      (const_int 3)
11045
                      (const_int 5)
11046
                      (const_int 7)
11047
                      (const_int 9)
11048
                      (const_int 11)
11049
                      (const_int 13)
11050
                      (const_int 15)])))))]
11051
  "TARGET_XOP"
11052
  "vphsubbw\t{%1, %0|%0, %1}"
11053
  [(set_attr "type" "sseiadd1")])
11054
 
11055
(define_insn "xop_phsubwd"
11056
  [(set (match_operand:V4SI 0 "register_operand" "=x")
11057
        (minus:V4SI
11058
         (sign_extend:V4SI
11059
          (vec_select:V4HI
11060
           (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11061
           (parallel [(const_int 0)
11062
                      (const_int 2)
11063
                      (const_int 4)
11064
                      (const_int 6)])))
11065
         (sign_extend:V4SI
11066
          (vec_select:V4HI
11067
           (match_dup 1)
11068
           (parallel [(const_int 1)
11069
                      (const_int 3)
11070
                      (const_int 5)
11071
                      (const_int 7)])))))]
11072
  "TARGET_XOP"
11073
  "vphsubwd\t{%1, %0|%0, %1}"
11074
  [(set_attr "type" "sseiadd1")])
11075
 
11076
(define_insn "xop_phsubdq"
11077
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11078
        (minus:V2DI
11079
         (sign_extend:V2DI
11080
          (vec_select:V2SI
11081
           (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11082
           (parallel [(const_int 0)
11083
                      (const_int 2)])))
11084
         (sign_extend:V2DI
11085
          (vec_select:V2SI
11086
           (match_dup 1)
11087
           (parallel [(const_int 1)
11088
                      (const_int 3)])))))]
11089
  "TARGET_XOP"
11090
  "vphsubdq\t{%1, %0|%0, %1}"
11091
  [(set_attr "type" "sseiadd1")])
11092
 
11093
;; XOP permute instructions
11094
(define_insn "xop_pperm"
11095
  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11096
        (unspec:V16QI
11097
          [(match_operand:V16QI 1 "register_operand" "x,x")
11098
           (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11099
           (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11100
          UNSPEC_XOP_PERMUTE))]
11101
  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11102
  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11103
  [(set_attr "type" "sse4arg")
11104
   (set_attr "mode" "TI")])
11105
 
11106
;; XOP pack instructions that combine two vectors into a smaller vector
11107
(define_insn "xop_pperm_pack_v2di_v4si"
11108
  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11109
        (vec_concat:V4SI
11110
         (truncate:V2SI
11111
          (match_operand:V2DI 1 "register_operand" "x,x"))
11112
         (truncate:V2SI
11113
          (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11114
   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11115
  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11116
  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11117
  [(set_attr "type" "sse4arg")
11118
   (set_attr "mode" "TI")])
11119
 
11120
(define_insn "xop_pperm_pack_v4si_v8hi"
11121
  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11122
        (vec_concat:V8HI
11123
         (truncate:V4HI
11124
          (match_operand:V4SI 1 "register_operand" "x,x"))
11125
         (truncate:V4HI
11126
          (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11127
   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11128
  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11129
  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11130
  [(set_attr "type" "sse4arg")
11131
   (set_attr "mode" "TI")])
11132
 
11133
(define_insn "xop_pperm_pack_v8hi_v16qi"
11134
  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11135
        (vec_concat:V16QI
11136
         (truncate:V8QI
11137
          (match_operand:V8HI 1 "register_operand" "x,x"))
11138
         (truncate:V8QI
11139
          (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11140
   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11141
  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11142
  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11143
  [(set_attr "type" "sse4arg")
11144
   (set_attr "mode" "TI")])
11145
 
11146
;; XOP packed rotate instructions
11147
(define_expand "rotl3"
11148
  [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11149
        (rotate:SSEMODE1248
11150
         (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11151
         (match_operand:SI 2 "general_operand")))]
11152
  "TARGET_XOP"
11153
{
11154
  /* If we were given a scalar, convert it to parallel */
11155
  if (! const_0_to__operand (operands[2], SImode))
11156
    {
11157
      rtvec vs = rtvec_alloc ();
11158
      rtx par = gen_rtx_PARALLEL (mode, vs);
11159
      rtx reg = gen_reg_rtx (mode);
11160
      rtx op2 = operands[2];
11161
      int i;
11162
 
11163
      if (GET_MODE (op2) != mode)
11164
        {
11165
          op2 = gen_reg_rtx (mode);
11166
          convert_move (op2, operands[2], false);
11167
        }
11168
 
11169
      for (i = 0; i < ; i++)
11170
        RTVEC_ELT (vs, i) = op2;
11171
 
11172
      emit_insn (gen_vec_init (reg, par));
11173
      emit_insn (gen_xop_vrotl3 (operands[0], operands[1], reg));
11174
      DONE;
11175
    }
11176
})
11177
 
11178
(define_expand "rotr3"
11179
  [(set (match_operand:SSEMODE1248 0 "register_operand" "")
11180
        (rotatert:SSEMODE1248
11181
         (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
11182
         (match_operand:SI 2 "general_operand")))]
11183
  "TARGET_XOP"
11184
{
11185
  /* If we were given a scalar, convert it to parallel */
11186
  if (! const_0_to__operand (operands[2], SImode))
11187
    {
11188
      rtvec vs = rtvec_alloc ();
11189
      rtx par = gen_rtx_PARALLEL (mode, vs);
11190
      rtx neg = gen_reg_rtx (mode);
11191
      rtx reg = gen_reg_rtx (mode);
11192
      rtx op2 = operands[2];
11193
      int i;
11194
 
11195
      if (GET_MODE (op2) != mode)
11196
        {
11197
          op2 = gen_reg_rtx (mode);
11198
          convert_move (op2, operands[2], false);
11199
        }
11200
 
11201
      for (i = 0; i < ; i++)
11202
        RTVEC_ELT (vs, i) = op2;
11203
 
11204
      emit_insn (gen_vec_init (reg, par));
11205
      emit_insn (gen_neg2 (neg, reg));
11206
      emit_insn (gen_xop_vrotl3 (operands[0], operands[1], neg));
11207
      DONE;
11208
    }
11209
})
11210
 
11211
(define_insn "xop_rotl3"
11212
  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11213
        (rotate:SSEMODE1248
11214
         (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11215
         (match_operand:SI 2 "const_0_to__operand" "n")))]
11216
  "TARGET_XOP"
11217
  "vprot\t{%2, %1, %0|%0, %1, %2}"
11218
  [(set_attr "type" "sseishft")
11219
   (set_attr "length_immediate" "1")
11220
   (set_attr "mode" "TI")])
11221
 
11222
(define_insn "xop_rotr3"
11223
  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11224
        (rotatert:SSEMODE1248
11225
         (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11226
         (match_operand:SI 2 "const_0_to__operand" "n")))]
11227
  "TARGET_XOP"
11228
{
11229
  operands[3] = GEN_INT (( * 8) - INTVAL (operands[2]));
11230
  return \"vprot\t{%3, %1, %0|%0, %1, %3}\";
11231
}
11232
  [(set_attr "type" "sseishft")
11233
   (set_attr "length_immediate" "1")
11234
   (set_attr "mode" "TI")])
11235
 
11236
(define_expand "vrotr3"
11237
  [(match_operand:SSEMODE1248 0 "register_operand" "")
11238
   (match_operand:SSEMODE1248 1 "register_operand" "")
11239
   (match_operand:SSEMODE1248 2 "register_operand" "")]
11240
  "TARGET_XOP"
11241
{
11242
  rtx reg = gen_reg_rtx (mode);
11243
  emit_insn (gen_neg2 (reg, operands[2]));
11244
  emit_insn (gen_xop_vrotl3 (operands[0], operands[1], reg));
11245
  DONE;
11246
})
11247
 
11248
(define_expand "vrotl3"
11249
  [(match_operand:SSEMODE1248 0 "register_operand" "")
11250
   (match_operand:SSEMODE1248 1 "register_operand" "")
11251
   (match_operand:SSEMODE1248 2 "register_operand" "")]
11252
  "TARGET_XOP"
11253
{
11254
  emit_insn (gen_xop_vrotl3 (operands[0], operands[1], operands[2]));
11255
  DONE;
11256
})
11257
 
11258
(define_insn "xop_vrotl3"
11259
  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11260
        (if_then_else:SSEMODE1248
11261
         (ge:SSEMODE1248
11262
          (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11263
          (const_int 0))
11264
         (rotate:SSEMODE1248
11265
          (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11266
          (match_dup 2))
11267
         (rotatert:SSEMODE1248
11268
          (match_dup 1)
11269
          (neg:SSEMODE1248 (match_dup 2)))))]
11270
  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11271
  "vprot\t{%2, %1, %0|%0, %1, %2}"
11272
  [(set_attr "type" "sseishft")
11273
   (set_attr "prefix_data16" "0")
11274
   (set_attr "prefix_extra" "2")
11275
   (set_attr "mode" "TI")])
11276
 
11277
;; XOP packed shift instructions.
11278
;; FIXME: add V2DI back in
11279
(define_expand "vlshr3"
11280
  [(match_operand:SSEMODE124 0 "register_operand" "")
11281
   (match_operand:SSEMODE124 1 "register_operand" "")
11282
   (match_operand:SSEMODE124 2 "register_operand" "")]
11283
  "TARGET_XOP"
11284
{
11285
  rtx neg = gen_reg_rtx (mode);
11286
  emit_insn (gen_neg2 (neg, operands[2]));
11287
  emit_insn (gen_xop_lshl3 (operands[0], operands[1], neg));
11288
  DONE;
11289
})
11290
 
11291
(define_expand "vashr3"
11292
  [(match_operand:SSEMODE124 0 "register_operand" "")
11293
   (match_operand:SSEMODE124 1 "register_operand" "")
11294
   (match_operand:SSEMODE124 2 "register_operand" "")]
11295
  "TARGET_XOP"
11296
{
11297
  rtx neg = gen_reg_rtx (mode);
11298
  emit_insn (gen_neg2 (neg, operands[2]));
11299
  emit_insn (gen_xop_ashl3 (operands[0], operands[1], neg));
11300
  DONE;
11301
})
11302
 
11303
(define_expand "vashl3"
11304
  [(match_operand:SSEMODE124 0 "register_operand" "")
11305
   (match_operand:SSEMODE124 1 "register_operand" "")
11306
   (match_operand:SSEMODE124 2 "register_operand" "")]
11307
  "TARGET_XOP"
11308
{
11309
  emit_insn (gen_xop_ashl3 (operands[0], operands[1], operands[2]));
11310
  DONE;
11311
})
11312
 
11313
(define_insn "xop_ashl3"
11314
  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11315
        (if_then_else:SSEMODE1248
11316
         (ge:SSEMODE1248
11317
          (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11318
          (const_int 0))
11319
         (ashift:SSEMODE1248
11320
          (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11321
          (match_dup 2))
11322
         (ashiftrt:SSEMODE1248
11323
          (match_dup 1)
11324
          (neg:SSEMODE1248 (match_dup 2)))))]
11325
  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11326
  "vpsha\t{%2, %1, %0|%0, %1, %2}"
11327
  [(set_attr "type" "sseishft")
11328
   (set_attr "prefix_data16" "0")
11329
   (set_attr "prefix_extra" "2")
11330
   (set_attr "mode" "TI")])
11331
 
11332
(define_insn "xop_lshl3"
11333
  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11334
        (if_then_else:SSEMODE1248
11335
         (ge:SSEMODE1248
11336
          (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11337
          (const_int 0))
11338
         (ashift:SSEMODE1248
11339
          (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11340
          (match_dup 2))
11341
         (lshiftrt:SSEMODE1248
11342
          (match_dup 1)
11343
          (neg:SSEMODE1248 (match_dup 2)))))]
11344
  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11345
  "vpshl\t{%2, %1, %0|%0, %1, %2}"
11346
  [(set_attr "type" "sseishft")
11347
   (set_attr "prefix_data16" "0")
11348
   (set_attr "prefix_extra" "2")
11349
   (set_attr "mode" "TI")])
11350
 
11351
;; SSE2 doesn't have some shift varients, so define versions for XOP
11352
(define_expand "ashlv16qi3"
11353
  [(match_operand:V16QI 0 "register_operand" "")
11354
   (match_operand:V16QI 1 "register_operand" "")
11355
   (match_operand:SI 2 "nonmemory_operand" "")]
11356
  "TARGET_XOP"
11357
{
11358
  rtvec vs = rtvec_alloc (16);
11359
  rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11360
  rtx reg = gen_reg_rtx (V16QImode);
11361
  int i;
11362
  for (i = 0; i < 16; i++)
11363
    RTVEC_ELT (vs, i) = operands[2];
11364
 
11365
  emit_insn (gen_vec_initv16qi (reg, par));
11366
  emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11367
  DONE;
11368
})
11369
 
11370
(define_expand "lshlv16qi3"
11371
  [(match_operand:V16QI 0 "register_operand" "")
11372
   (match_operand:V16QI 1 "register_operand" "")
11373
   (match_operand:SI 2 "nonmemory_operand" "")]
11374
  "TARGET_XOP"
11375
{
11376
  rtvec vs = rtvec_alloc (16);
11377
  rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11378
  rtx reg = gen_reg_rtx (V16QImode);
11379
  int i;
11380
  for (i = 0; i < 16; i++)
11381
    RTVEC_ELT (vs, i) = operands[2];
11382
 
11383
  emit_insn (gen_vec_initv16qi (reg, par));
11384
  emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11385
  DONE;
11386
})
11387
 
11388
(define_expand "ashrv16qi3"
11389
  [(match_operand:V16QI 0 "register_operand" "")
11390
   (match_operand:V16QI 1 "register_operand" "")
11391
   (match_operand:SI 2 "nonmemory_operand" "")]
11392
  "TARGET_XOP"
11393
{
11394
  rtvec vs = rtvec_alloc (16);
11395
  rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11396
  rtx reg = gen_reg_rtx (V16QImode);
11397
  int i;
11398
  rtx ele = ((CONST_INT_P (operands[2]))
11399
             ? GEN_INT (- INTVAL (operands[2]))
11400
             : operands[2]);
11401
 
11402
  for (i = 0; i < 16; i++)
11403
    RTVEC_ELT (vs, i) = ele;
11404
 
11405
  emit_insn (gen_vec_initv16qi (reg, par));
11406
 
11407
  if (!CONST_INT_P (operands[2]))
11408
    {
11409
      rtx neg = gen_reg_rtx (V16QImode);
11410
      emit_insn (gen_negv16qi2 (neg, reg));
11411
      emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11412
    }
11413
  else
11414
    emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11415
 
11416
  DONE;
11417
})
11418
 
11419
(define_expand "ashrv2di3"
11420
  [(match_operand:V2DI 0 "register_operand" "")
11421
   (match_operand:V2DI 1 "register_operand" "")
11422
   (match_operand:DI 2 "nonmemory_operand" "")]
11423
  "TARGET_XOP"
11424
{
11425
  rtvec vs = rtvec_alloc (2);
11426
  rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11427
  rtx reg = gen_reg_rtx (V2DImode);
11428
  rtx ele;
11429
 
11430
  if (CONST_INT_P (operands[2]))
11431
    ele = GEN_INT (- INTVAL (operands[2]));
11432
  else if (GET_MODE (operands[2]) != DImode)
11433
    {
11434
      rtx move = gen_reg_rtx (DImode);
11435
      ele = gen_reg_rtx (DImode);
11436
      convert_move (move, operands[2], false);
11437
      emit_insn (gen_negdi2 (ele, move));
11438
    }
11439
  else
11440
    {
11441
      ele = gen_reg_rtx (DImode);
11442
      emit_insn (gen_negdi2 (ele, operands[2]));
11443
    }
11444
 
11445
  RTVEC_ELT (vs, 0) = ele;
11446
  RTVEC_ELT (vs, 1) = ele;
11447
  emit_insn (gen_vec_initv2di (reg, par));
11448
  emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11449
  DONE;
11450
})
11451
 
11452
;; XOP FRCZ support
11453
;; parallel insns
11454
(define_insn "xop_frcz2"
11455
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11456
        (unspec:SSEMODEF2P
11457
         [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11458
         UNSPEC_FRCZ))]
11459
  "TARGET_XOP"
11460
  "vfrcz\t{%1, %0|%0, %1}"
11461
  [(set_attr "type" "ssecvt1")
11462
   (set_attr "mode" "")])
11463
 
11464
;; scalar insns
11465
(define_insn "xop_vmfrcz2"
11466
  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11467
        (vec_merge:SSEMODEF2P
11468
          (unspec:SSEMODEF2P
11469
           [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11470
           UNSPEC_FRCZ)
11471
          (match_operand:SSEMODEF2P 1 "register_operand" "0")
11472
          (const_int 1)))]
11473
  "TARGET_XOP"
11474
  "vfrcz\t{%2, %0|%0, %2}"
11475
  [(set_attr "type" "ssecvt1")
11476
   (set_attr "mode" "")])
11477
 
11478
(define_insn "xop_frcz2256"
11479
  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11480
        (unspec:FMA4MODEF4
11481
         [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11482
         UNSPEC_FRCZ))]
11483
  "TARGET_XOP"
11484
  "vfrcz\t{%1, %0|%0, %1}"
11485
  [(set_attr "type" "ssecvt1")
11486
   (set_attr "mode" "")])
11487
 
11488
(define_insn "xop_maskcmp3"
11489
  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11490
        (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11491
         [(match_operand:SSEMODE1248 2 "register_operand" "x")
11492
          (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11493
  "TARGET_XOP"
11494
  "vpcom%Y1\t{%3, %2, %0|%0, %2, %3}"
11495
  [(set_attr "type" "sse4arg")
11496
   (set_attr "prefix_data16" "0")
11497
   (set_attr "prefix_rep" "0")
11498
   (set_attr "prefix_extra" "2")
11499
   (set_attr "length_immediate" "1")
11500
   (set_attr "mode" "TI")])
11501
 
11502
(define_insn "xop_maskcmp_uns3"
11503
  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11504
        (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11505
         [(match_operand:SSEMODE1248 2 "register_operand" "x")
11506
          (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11507
  "TARGET_XOP"
11508
  "vpcom%Y1u\t{%3, %2, %0|%0, %2, %3}"
11509
  [(set_attr "type" "ssecmp")
11510
   (set_attr "prefix_data16" "0")
11511
   (set_attr "prefix_rep" "0")
11512
   (set_attr "prefix_extra" "2")
11513
   (set_attr "length_immediate" "1")
11514
   (set_attr "mode" "TI")])
11515
 
11516
;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11517
;; and pcomneu* not to be converted to the signed ones in case somebody needs
11518
;; the exact instruction generated for the intrinsic.
11519
(define_insn "xop_maskcmp_uns23"
11520
  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11521
        (unspec:SSEMODE1248
11522
         [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11523
          [(match_operand:SSEMODE1248 2 "register_operand" "x")
11524
           (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11525
         UNSPEC_XOP_UNSIGNED_CMP))]
11526
  "TARGET_XOP"
11527
  "vpcom%Y1u\t{%3, %2, %0|%0, %2, %3}"
11528
  [(set_attr "type" "ssecmp")
11529
   (set_attr "prefix_data16" "0")
11530
   (set_attr "prefix_extra" "2")
11531
   (set_attr "length_immediate" "1")
11532
   (set_attr "mode" "TI")])
11533
 
11534
;; Pcomtrue and pcomfalse support.  These are useless instructions, but are
11535
;; being added here to be complete.
11536
(define_insn "xop_pcom_tf3"
11537
  [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11538
        (unspec:SSEMODE1248
11539
          [(match_operand:SSEMODE1248 1 "register_operand" "x")
11540
           (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11541
           (match_operand:SI 3 "const_int_operand" "n")]
11542
          UNSPEC_XOP_TRUEFALSE))]
11543
  "TARGET_XOP"
11544
{
11545
  return ((INTVAL (operands[3]) != 0)
11546
          ? "vpcomtrue\t{%2, %1, %0|%0, %1, %2}"
11547
          : "vpcomfalse\t{%2, %1, %0|%0, %1, %2}");
11548
}
11549
  [(set_attr "type" "ssecmp")
11550
   (set_attr "prefix_data16" "0")
11551
   (set_attr "prefix_extra" "2")
11552
   (set_attr "length_immediate" "1")
11553
   (set_attr "mode" "TI")])
11554
 
11555
(define_insn "xop_vpermil23"
11556
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11557
        (unspec:AVXMODEF2P
11558
          [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11559
           (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "%x")
11560
           (match_operand: 3 "nonimmediate_operand" "xm")
11561
           (match_operand:SI 4 "const_0_to_3_operand" "n")]
11562
          UNSPEC_VPERMIL2))]
11563
  "TARGET_XOP"
11564
  "vpermil2p\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11565
  [(set_attr "type" "sse4arg")
11566
   (set_attr "length_immediate" "1")
11567
   (set_attr "mode" "")])
11568
 
11569
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11570
(define_insn "*avx_aesenc"
11571
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11572
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11573
                       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11574
                      UNSPEC_AESENC))]
11575
  "TARGET_AES && TARGET_AVX"
11576
  "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11577
  [(set_attr "type" "sselog1")
11578
   (set_attr "prefix_extra" "1")
11579
   (set_attr "prefix" "vex")
11580
   (set_attr "mode" "TI")])
11581
 
11582
(define_insn "aesenc"
11583
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11584
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11585
                       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11586
                      UNSPEC_AESENC))]
11587
  "TARGET_AES"
11588
  "aesenc\t{%2, %0|%0, %2}"
11589
  [(set_attr "type" "sselog1")
11590
   (set_attr "prefix_extra" "1")
11591
   (set_attr "mode" "TI")])
11592
 
11593
(define_insn "*avx_aesenclast"
11594
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11595
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11596
                       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11597
                      UNSPEC_AESENCLAST))]
11598
  "TARGET_AES && TARGET_AVX"
11599
  "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11600
  [(set_attr "type" "sselog1")
11601
   (set_attr "prefix_extra" "1")
11602
   (set_attr "prefix" "vex")
11603
   (set_attr "mode" "TI")])
11604
 
11605
(define_insn "aesenclast"
11606
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11607
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11608
                       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11609
                      UNSPEC_AESENCLAST))]
11610
  "TARGET_AES"
11611
  "aesenclast\t{%2, %0|%0, %2}"
11612
  [(set_attr "type" "sselog1")
11613
   (set_attr "prefix_extra" "1")
11614
   (set_attr "mode" "TI")])
11615
 
11616
(define_insn "*avx_aesdec"
11617
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11618
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11619
                       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11620
                      UNSPEC_AESDEC))]
11621
  "TARGET_AES && TARGET_AVX"
11622
  "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11623
  [(set_attr "type" "sselog1")
11624
   (set_attr "prefix_extra" "1")
11625
   (set_attr "prefix" "vex")
11626
   (set_attr "mode" "TI")])
11627
 
11628
(define_insn "aesdec"
11629
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11630
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11631
                       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11632
                      UNSPEC_AESDEC))]
11633
  "TARGET_AES"
11634
  "aesdec\t{%2, %0|%0, %2}"
11635
  [(set_attr "type" "sselog1")
11636
   (set_attr "prefix_extra" "1")
11637
   (set_attr "mode" "TI")])
11638
 
11639
(define_insn "*avx_aesdeclast"
11640
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11641
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11642
                       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11643
                      UNSPEC_AESDECLAST))]
11644
  "TARGET_AES && TARGET_AVX"
11645
  "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11646
  [(set_attr "type" "sselog1")
11647
   (set_attr "prefix_extra" "1")
11648
   (set_attr "prefix" "vex")
11649
   (set_attr "mode" "TI")])
11650
 
11651
(define_insn "aesdeclast"
11652
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11653
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11654
                       (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11655
                      UNSPEC_AESDECLAST))]
11656
  "TARGET_AES"
11657
  "aesdeclast\t{%2, %0|%0, %2}"
11658
  [(set_attr "type" "sselog1")
11659
   (set_attr "prefix_extra" "1")
11660
   (set_attr "mode" "TI")])
11661
 
11662
(define_insn "aesimc"
11663
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11664
        (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11665
                      UNSPEC_AESIMC))]
11666
  "TARGET_AES"
11667
  "%vaesimc\t{%1, %0|%0, %1}"
11668
  [(set_attr "type" "sselog1")
11669
   (set_attr "prefix_extra" "1")
11670
   (set_attr "prefix" "maybe_vex")
11671
   (set_attr "mode" "TI")])
11672
 
11673
(define_insn "aeskeygenassist"
11674
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11675
        (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11676
                      (match_operand:SI 2 "const_0_to_255_operand" "n")]
11677
                     UNSPEC_AESKEYGENASSIST))]
11678
  "TARGET_AES"
11679
  "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11680
  [(set_attr "type" "sselog1")
11681
   (set_attr "prefix_extra" "1")
11682
   (set_attr "length_immediate" "1")
11683
   (set_attr "prefix" "maybe_vex")
11684
   (set_attr "mode" "TI")])
11685
 
11686
(define_insn "*vpclmulqdq"
11687
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11688
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11689
                      (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11690
                      (match_operand:SI 3 "const_0_to_255_operand" "n")]
11691
                     UNSPEC_PCLMUL))]
11692
  "TARGET_PCLMUL && TARGET_AVX"
11693
  "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11694
  [(set_attr "type" "sselog1")
11695
   (set_attr "prefix_extra" "1")
11696
   (set_attr "length_immediate" "1")
11697
   (set_attr "prefix" "vex")
11698
   (set_attr "mode" "TI")])
11699
 
11700
(define_insn "pclmulqdq"
11701
  [(set (match_operand:V2DI 0 "register_operand" "=x")
11702
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11703
                      (match_operand:V2DI 2 "nonimmediate_operand" "xm")
11704
                      (match_operand:SI 3 "const_0_to_255_operand" "n")]
11705
                     UNSPEC_PCLMUL))]
11706
  "TARGET_PCLMUL"
11707
  "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11708
  [(set_attr "type" "sselog1")
11709
   (set_attr "prefix_extra" "1")
11710
   (set_attr "length_immediate" "1")
11711
   (set_attr "mode" "TI")])
11712
 
11713
(define_expand "avx_vzeroall"
11714
  [(match_par_dup 0 [(const_int 0)])]
11715
  "TARGET_AVX"
11716
{
11717
  int nregs = TARGET_64BIT ? 16 : 8;
11718
  int regno;
11719
 
11720
  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11721
 
11722
  XVECEXP (operands[0], 0, 0)
11723
    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11724
                               UNSPECV_VZEROALL);
11725
 
11726
  for (regno = 0; regno < nregs; regno++)
11727
    XVECEXP (operands[0], 0, regno + 1)
11728
      = gen_rtx_SET (VOIDmode,
11729
                     gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11730
                     CONST0_RTX (V8SImode));
11731
})
11732
 
11733
(define_insn "*avx_vzeroall"
11734
  [(match_parallel 0 "vzeroall_operation"
11735
    [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11736
  "TARGET_AVX"
11737
  "vzeroall"
11738
  [(set_attr "type" "sse")
11739
   (set_attr "modrm" "0")
11740
   (set_attr "memory" "none")
11741
   (set_attr "prefix" "vex")
11742
   (set_attr "mode" "OI")])
11743
 
11744
;; vzeroupper clobbers the upper 128bits of AVX registers.
11745
(define_expand "avx_vzeroupper"
11746
  [(match_par_dup 0 [(const_int 0)])]
11747
  "TARGET_AVX"
11748
{
11749
  int nregs = TARGET_64BIT ? 16 : 8;
11750
  int regno;
11751
 
11752
  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11753
 
11754
  XVECEXP (operands[0], 0, 0)
11755
    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11756
                               UNSPECV_VZEROUPPER);
11757
 
11758
  for (regno = 0; regno < nregs; regno++)
11759
    XVECEXP (operands[0], 0, regno + 1)
11760
      = gen_rtx_CLOBBER (VOIDmode,
11761
                         gen_rtx_REG (V8SImode, SSE_REGNO (regno)));
11762
})
11763
 
11764
(define_insn "*avx_vzeroupper"
11765
  [(match_parallel 0 "vzeroupper_operation"
11766
    [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
11767
  "TARGET_AVX"
11768
  "vzeroupper"
11769
  [(set_attr "type" "sse")
11770
   (set_attr "modrm" "0")
11771
   (set_attr "memory" "none")
11772
   (set_attr "prefix" "vex")
11773
   (set_attr "mode" "OI")])
11774
 
11775
(define_insn_and_split "vec_dup"
11776
  [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11777
        (vec_duplicate:AVX256MODE24P
11778
          (match_operand: 1 "nonimmediate_operand" "m,?x")))]
11779
  "TARGET_AVX"
11780
  "@
11781
   vbroadcasts\t{%1, %0|%0, %1}
11782
   #"
11783
  "&& reload_completed && REG_P (operands[1])"
11784
  [(set (match_dup 2) (vec_duplicate: (match_dup 1)))
11785
   (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11786
{
11787
  operands[2] = gen_rtx_REG (mode, REGNO (operands[0]));
11788
}
11789
  [(set_attr "type" "ssemov")
11790
   (set_attr "prefix_extra" "1")
11791
   (set_attr "prefix" "vex")
11792
   (set_attr "mode" "V8SF")])
11793
 
11794
(define_insn "avx_vbroadcastf128_"
11795
  [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11796
        (vec_concat:AVX256MODE
11797
          (match_operand: 1 "nonimmediate_operand" "m,0,?x")
11798
          (match_dup 1)))]
11799
  "TARGET_AVX"
11800
  "@
11801
   vbroadcastf128\t{%1, %0|%0, %1}
11802
   vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11803
   vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11804
  [(set_attr "type" "ssemov,sselog1,sselog1")
11805
   (set_attr "prefix_extra" "1")
11806
   (set_attr "length_immediate" "0,1,1")
11807
   (set_attr "prefix" "vex")
11808
   (set_attr "mode" "V4SF,V8SF,V8SF")])
11809
 
11810
;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11811
;; If it so happens that the input is in memory, use vbroadcast.
11812
;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11813
(define_insn "*avx_vperm_broadcast_v4sf"
11814
  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11815
        (vec_select:V4SF
11816
          (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11817
          (match_parallel 2 "avx_vbroadcast_operand"
11818
            [(match_operand 3 "const_int_operand" "C,n,n")])))]
11819
  "TARGET_AVX"
11820
{
11821
  int elt = INTVAL (operands[3]);
11822
  switch (which_alternative)
11823
    {
11824
    case 0:
11825
    case 1:
11826
      operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11827
      return "vbroadcastss\t{%1, %0|%0, %1}";
11828
    case 2:
11829
      operands[2] = GEN_INT (elt * 0x55);
11830
      return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11831
    default:
11832
      gcc_unreachable ();
11833
    }
11834
}
11835
  [(set_attr "type" "ssemov,ssemov,sselog1")
11836
   (set_attr "prefix_extra" "1")
11837
   (set_attr "length_immediate" "0,0,1")
11838
   (set_attr "prefix" "vex")
11839
   (set_attr "mode" "SF,SF,V4SF")])
11840
 
11841
(define_insn_and_split "*avx_vperm_broadcast_"
11842
  [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11843
        (vec_select:AVX256MODEF2P
11844
          (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11845
          (match_parallel 2 "avx_vbroadcast_operand"
11846
            [(match_operand 3 "const_int_operand" "C,n,n")])))]
11847
  "TARGET_AVX"
11848
  "#"
11849
  "&& reload_completed"
11850
  [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11851
{
11852
  rtx op0 = operands[0], op1 = operands[1];
11853
  int elt = INTVAL (operands[3]);
11854
 
11855
  if (REG_P (op1))
11856
    {
11857
      int mask;
11858
 
11859
      /* Shuffle element we care about into all elements of the 128-bit lane.
11860
         The other lane gets shuffled too, but we don't care.  */
11861
      if (mode == V4DFmode)
11862
        mask = (elt & 1 ? 15 : 0);
11863
      else
11864
        mask = (elt & 3) * 0x55;
11865
      emit_insn (gen_avx_vpermil (op0, op1, GEN_INT (mask)));
11866
 
11867
      /* Shuffle the lane we care about into both lanes of the dest.  */
11868
      mask = (elt / ( / 2)) * 0x11;
11869
      emit_insn (gen_avx_vperm2f1283 (op0, op0, op0, GEN_INT (mask)));
11870
      DONE;
11871
    }
11872
 
11873
  operands[1] = adjust_address_nv (op1, mode,
11874
                                   elt * GET_MODE_SIZE (mode));
11875
})
11876
 
11877
(define_expand "avx_vpermil"
11878
  [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11879
        (vec_select:AVXMODEFDP
11880
          (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11881
          (match_operand:SI 2 "const_0_to_255_operand" "")))]
11882
  "TARGET_AVX"
11883
{
11884
  int mask = INTVAL (operands[2]);
11885
  rtx perm[];
11886
 
11887
  perm[0] = GEN_INT (mask & 1);
11888
  perm[1] = GEN_INT ((mask >> 1) & 1);
11889
  if (mode == V4DFmode)
11890
    {
11891
      perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11892
      perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11893
    }
11894
 
11895
  operands[2]
11896
    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (, perm));
11897
})
11898
 
11899
(define_expand "avx_vpermil"
11900
  [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11901
        (vec_select:AVXMODEFSP
11902
          (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11903
          (match_operand:SI 2 "const_0_to_255_operand" "")))]
11904
  "TARGET_AVX"
11905
{
11906
  int mask = INTVAL (operands[2]);
11907
  rtx perm[];
11908
 
11909
  perm[0] = GEN_INT (mask & 3);
11910
  perm[1] = GEN_INT ((mask >> 2) & 3);
11911
  perm[2] = GEN_INT ((mask >> 4) & 3);
11912
  perm[3] = GEN_INT ((mask >> 6) & 3);
11913
  if (mode == V8SFmode)
11914
    {
11915
      perm[4] = GEN_INT ((mask & 3) + 4);
11916
      perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11917
      perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11918
      perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11919
    }
11920
 
11921
  operands[2]
11922
    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (, perm));
11923
})
11924
 
11925
(define_insn "*avx_vpermilp"
11926
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11927
        (vec_select:AVXMODEF2P
11928
          (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11929
          (match_parallel 2 "avx_vpermilp__operand"
11930
            [(match_operand 3 "const_int_operand" "")])))]
11931
  "TARGET_AVX"
11932
{
11933
  int mask = avx_vpermilp_parallel (operands[2], mode) - 1;
11934
  operands[2] = GEN_INT (mask);
11935
  return "vpermilp\t{%2, %1, %0|%0, %1, %2}";
11936
}
11937
  [(set_attr "type" "sselog")
11938
   (set_attr "prefix_extra" "1")
11939
   (set_attr "length_immediate" "1")
11940
   (set_attr "prefix" "vex")
11941
   (set_attr "mode" "")])
11942
 
11943
(define_insn "avx_vpermilvar3"
11944
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11945
        (unspec:AVXMODEF2P
11946
          [(match_operand:AVXMODEF2P 1 "register_operand" "x")
11947
           (match_operand: 2 "nonimmediate_operand" "xm")]
11948
          UNSPEC_VPERMIL))]
11949
  "TARGET_AVX"
11950
  "vpermilp\t{%2, %1, %0|%0, %1, %2}"
11951
  [(set_attr "type" "sselog")
11952
   (set_attr "prefix_extra" "1")
11953
   (set_attr "prefix" "vex")
11954
   (set_attr "mode" "")])
11955
 
11956
(define_expand "avx_vperm2f1283"
11957
  [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11958
        (unspec:AVX256MODE2P
11959
          [(match_operand:AVX256MODE2P 1 "register_operand" "")
11960
           (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11961
           (match_operand:SI 3 "const_0_to_255_operand" "")]
11962
          UNSPEC_VPERMIL2F128))]
11963
  "TARGET_AVX"
11964
{
11965
  int mask = INTVAL (operands[3]);
11966
  if ((mask & 0x88) == 0)
11967
    {
11968
      rtx perm[], t1, t2;
11969
      int i, base, nelt = , nelt2 = nelt / 2;
11970
 
11971
      base = (mask & 3) * nelt2;
11972
      for (i = 0; i < nelt2; ++i)
11973
        perm[i] = GEN_INT (base + i);
11974
 
11975
      base = ((mask >> 4) & 3) * nelt2;
11976
      for (i = 0; i < nelt2; ++i)
11977
        perm[i + nelt2] = GEN_INT (base + i);
11978
 
11979
      t2 = gen_rtx_VEC_CONCAT (mode,
11980
                               operands[1], operands[2]);
11981
      t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11982
      t2 = gen_rtx_VEC_SELECT (mode, t2, t1);
11983
      t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11984
      emit_insn (t2);
11985
      DONE;
11986
    }
11987
})
11988
 
11989
;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11990
;; means that in order to represent this properly in rtl we'd have to
11991
;; nest *another* vec_concat with a zero operand and do the select from
11992
;; a 4x wide vector.  That doesn't seem very nice.
11993
(define_insn "*avx_vperm2f128_full"
11994
  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11995
        (unspec:AVX256MODE2P
11996
          [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11997
           (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11998
           (match_operand:SI 3 "const_0_to_255_operand" "n")]
11999
          UNSPEC_VPERMIL2F128))]
12000
  "TARGET_AVX"
12001
  "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12002
  [(set_attr "type" "sselog")
12003
   (set_attr "prefix_extra" "1")
12004
   (set_attr "length_immediate" "1")
12005
   (set_attr "prefix" "vex")
12006
   (set_attr "mode" "V8SF")])
12007
 
12008
(define_insn "*avx_vperm2f128_nozero"
12009
  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12010
        (vec_select:AVX256MODE2P
12011
          (vec_concat:
12012
            (match_operand:AVX256MODE2P 1 "register_operand" "x")
12013
            (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12014
          (match_parallel 3 "avx_vperm2f128__operand"
12015
            [(match_operand 4 "const_int_operand" "")])))]
12016
  "TARGET_AVX"
12017
{
12018
  int mask = avx_vperm2f128_parallel (operands[3], mode) - 1;
12019
  operands[3] = GEN_INT (mask);
12020
  return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12021
}
12022
  [(set_attr "type" "sselog")
12023
   (set_attr "prefix_extra" "1")
12024
   (set_attr "length_immediate" "1")
12025
   (set_attr "prefix" "vex")
12026
   (set_attr "mode" "V8SF")])
12027
 
12028
(define_expand "avx_vinsertf128"
12029
  [(match_operand:AVX256MODE 0 "register_operand" "")
12030
   (match_operand:AVX256MODE 1 "register_operand" "")
12031
   (match_operand: 2 "nonimmediate_operand" "")
12032
   (match_operand:SI 3 "const_0_to_1_operand" "")]
12033
  "TARGET_AVX"
12034
{
12035
  switch (INTVAL (operands[3]))
12036
    {
12037
    case 0:
12038
      emit_insn (gen_vec_set_lo_ (operands[0], operands[1],
12039
                                        operands[2]));
12040
      break;
12041
    case 1:
12042
      emit_insn (gen_vec_set_hi_ (operands[0], operands[1],
12043
                                        operands[2]));
12044
      break;
12045
    default:
12046
      gcc_unreachable ();
12047
    }
12048
  DONE;
12049
})
12050
 
12051
(define_insn "vec_set_lo_"
12052
  [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12053
        (vec_concat:AVX256MODE4P
12054
          (match_operand: 2 "nonimmediate_operand" "xm")
12055
          (vec_select:
12056
            (match_operand:AVX256MODE4P 1 "register_operand" "x")
12057
            (parallel [(const_int 2) (const_int 3)]))))]
12058
  "TARGET_AVX"
12059
  "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12060
  [(set_attr "type" "sselog")
12061
   (set_attr "prefix_extra" "1")
12062
   (set_attr "length_immediate" "1")
12063
   (set_attr "prefix" "vex")
12064
   (set_attr "mode" "V8SF")])
12065
 
12066
(define_insn "vec_set_hi_"
12067
  [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
12068
        (vec_concat:AVX256MODE4P
12069
          (vec_select:
12070
            (match_operand:AVX256MODE4P 1 "register_operand" "x")
12071
            (parallel [(const_int 0) (const_int 1)]))
12072
          (match_operand: 2 "nonimmediate_operand" "xm")))]
12073
  "TARGET_AVX"
12074
  "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12075
  [(set_attr "type" "sselog")
12076
   (set_attr "prefix_extra" "1")
12077
   (set_attr "length_immediate" "1")
12078
   (set_attr "prefix" "vex")
12079
   (set_attr "mode" "V8SF")])
12080
 
12081
(define_insn "vec_set_lo_"
12082
  [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12083
        (vec_concat:AVX256MODE8P
12084
          (match_operand: 2 "nonimmediate_operand" "xm")
12085
          (vec_select:
12086
            (match_operand:AVX256MODE8P 1 "register_operand" "x")
12087
            (parallel [(const_int 4) (const_int 5)
12088
                       (const_int 6) (const_int 7)]))))]
12089
  "TARGET_AVX"
12090
  "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12091
  [(set_attr "type" "sselog")
12092
   (set_attr "prefix_extra" "1")
12093
   (set_attr "length_immediate" "1")
12094
   (set_attr "prefix" "vex")
12095
   (set_attr "mode" "V8SF")])
12096
 
12097
(define_insn "vec_set_hi_"
12098
  [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
12099
        (vec_concat:AVX256MODE8P
12100
          (vec_select:
12101
            (match_operand:AVX256MODE8P 1 "register_operand" "x")
12102
            (parallel [(const_int 0) (const_int 1)
12103
                       (const_int 2) (const_int 3)]))
12104
          (match_operand: 2 "nonimmediate_operand" "xm")))]
12105
  "TARGET_AVX"
12106
  "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12107
  [(set_attr "type" "sselog")
12108
   (set_attr "prefix_extra" "1")
12109
   (set_attr "length_immediate" "1")
12110
   (set_attr "prefix" "vex")
12111
   (set_attr "mode" "V8SF")])
12112
 
12113
(define_insn "vec_set_lo_v16hi"
12114
  [(set (match_operand:V16HI 0 "register_operand" "=x")
12115
        (vec_concat:V16HI
12116
          (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12117
          (vec_select:V8HI
12118
            (match_operand:V16HI 1 "register_operand" "x")
12119
            (parallel [(const_int 8) (const_int 9)
12120
                       (const_int 10) (const_int 11)
12121
                       (const_int 12) (const_int 13)
12122
                       (const_int 14) (const_int 15)]))))]
12123
  "TARGET_AVX"
12124
  "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12125
  [(set_attr "type" "sselog")
12126
   (set_attr "prefix_extra" "1")
12127
   (set_attr "length_immediate" "1")
12128
   (set_attr "prefix" "vex")
12129
   (set_attr "mode" "V8SF")])
12130
 
12131
(define_insn "vec_set_hi_v16hi"
12132
  [(set (match_operand:V16HI 0 "register_operand" "=x")
12133
        (vec_concat:V16HI
12134
          (vec_select:V8HI
12135
            (match_operand:V16HI 1 "register_operand" "x")
12136
            (parallel [(const_int 0) (const_int 1)
12137
                       (const_int 2) (const_int 3)
12138
                       (const_int 4) (const_int 5)
12139
                       (const_int 6) (const_int 7)]))
12140
          (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12141
  "TARGET_AVX"
12142
  "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12143
  [(set_attr "type" "sselog")
12144
   (set_attr "prefix_extra" "1")
12145
   (set_attr "length_immediate" "1")
12146
   (set_attr "prefix" "vex")
12147
   (set_attr "mode" "V8SF")])
12148
 
12149
(define_insn "vec_set_lo_v32qi"
12150
  [(set (match_operand:V32QI 0 "register_operand" "=x")
12151
        (vec_concat:V32QI
12152
          (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12153
          (vec_select:V16QI
12154
            (match_operand:V32QI 1 "register_operand" "x")
12155
            (parallel [(const_int 16) (const_int 17)
12156
                       (const_int 18) (const_int 19)
12157
                       (const_int 20) (const_int 21)
12158
                       (const_int 22) (const_int 23)
12159
                       (const_int 24) (const_int 25)
12160
                       (const_int 26) (const_int 27)
12161
                       (const_int 28) (const_int 29)
12162
                       (const_int 30) (const_int 31)]))))]
12163
  "TARGET_AVX"
12164
  "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12165
  [(set_attr "type" "sselog")
12166
   (set_attr "prefix_extra" "1")
12167
   (set_attr "length_immediate" "1")
12168
   (set_attr "prefix" "vex")
12169
   (set_attr "mode" "V8SF")])
12170
 
12171
(define_insn "vec_set_hi_v32qi"
12172
  [(set (match_operand:V32QI 0 "register_operand" "=x")
12173
        (vec_concat:V32QI
12174
          (vec_select:V16QI
12175
            (match_operand:V32QI 1 "register_operand" "x")
12176
            (parallel [(const_int 0) (const_int 1)
12177
                       (const_int 2) (const_int 3)
12178
                       (const_int 4) (const_int 5)
12179
                       (const_int 6) (const_int 7)
12180
                       (const_int 8) (const_int 9)
12181
                       (const_int 10) (const_int 11)
12182
                       (const_int 12) (const_int 13)
12183
                       (const_int 14) (const_int 15)]))
12184
          (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12185
  "TARGET_AVX"
12186
  "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12187
  [(set_attr "type" "sselog")
12188
   (set_attr "prefix_extra" "1")
12189
   (set_attr "length_immediate" "1")
12190
   (set_attr "prefix" "vex")
12191
   (set_attr "mode" "V8SF")])
12192
 
12193
(define_insn "avx_maskloadp"
12194
  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
12195
        (unspec:AVXMODEF2P
12196
          [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
12197
           (match_operand:AVXMODEF2P 2 "register_operand" "x")
12198
           (match_dup 0)]
12199
          UNSPEC_MASKLOAD))]
12200
  "TARGET_AVX"
12201
  "vmaskmovp\t{%1, %2, %0|%0, %2, %1}"
12202
  [(set_attr "type" "sselog1")
12203
   (set_attr "prefix_extra" "1")
12204
   (set_attr "prefix" "vex")
12205
   (set_attr "mode" "")])
12206
 
12207
(define_insn "avx_maskstorep"
12208
  [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
12209
        (unspec:AVXMODEF2P
12210
          [(match_operand:AVXMODEF2P 1 "register_operand" "x")
12211
           (match_operand:AVXMODEF2P 2 "register_operand" "x")
12212
           (match_dup 0)]
12213
          UNSPEC_MASKSTORE))]
12214
  "TARGET_AVX"
12215
  "vmaskmovp\t{%2, %1, %0|%0, %1, %2}"
12216
  [(set_attr "type" "sselog1")
12217
   (set_attr "prefix_extra" "1")
12218
   (set_attr "prefix" "vex")
12219
   (set_attr "mode" "")])
12220
 
12221
(define_insn "avx__"
12222
  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
12223
        (unspec:AVX256MODE2P
12224
          [(match_operand: 1 "nonimmediate_operand" "0,xm")]
12225
          UNSPEC_CAST))]
12226
  "TARGET_AVX"
12227
{
12228
  switch (which_alternative)
12229
    {
12230
    case 0:
12231
      return "";
12232
    case 1:
12233
      switch (get_attr_mode (insn))
12234
        {
12235
        case MODE_V8SF:
12236
          return "vmovaps\t{%1, %x0|%x0, %1}";
12237
        case MODE_V4DF:
12238
          return "vmovapd\t{%1, %x0|%x0, %1}";
12239
        case MODE_OI:
12240
          return "vmovdqa\t{%1, %x0|%x0, %1}";
12241
        default:
12242
          break;
12243
        }
12244
    default:
12245
      break;
12246
    }
12247
  gcc_unreachable ();
12248
}
12249
  [(set_attr "type" "ssemov")
12250
   (set_attr "prefix" "vex")
12251
   (set_attr "mode" "")
12252
   (set (attr "length")
12253
    (if_then_else (eq_attr "alternative" "0")
12254
       (const_string "0")
12255
       (const_string "*")))])
12256
 
12257
(define_insn "avx__"
12258
  [(set (match_operand: 0 "register_operand" "=x,x")
12259
        (unspec:
12260
          [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
12261
          UNSPEC_CAST))]
12262
  "TARGET_AVX"
12263
{
12264
  switch (which_alternative)
12265
    {
12266
    case 0:
12267
      return "";
12268
    case 1:
12269
      switch (get_attr_mode (insn))
12270
        {
12271
        case MODE_V8SF:
12272
          return "vmovaps\t{%x1, %0|%0, %x1}";
12273
        case MODE_V4DF:
12274
          return "vmovapd\t{%x1, %0|%0, %x1}";
12275
        case MODE_OI:
12276
          return "vmovdqa\t{%x1, %0|%0, %x1}";
12277
        default:
12278
          break;
12279
        }
12280
    default:
12281
      break;
12282
    }
12283
  gcc_unreachable ();
12284
}
12285
  [(set_attr "type" "ssemov")
12286
   (set_attr "prefix" "vex")
12287
   (set_attr "mode" "")
12288
   (set (attr "length")
12289
    (if_then_else (eq_attr "alternative" "0")
12290
       (const_string "0")
12291
       (const_string "*")))])
12292
 
12293
(define_expand "vec_init"
12294
  [(match_operand:AVX256MODE 0 "register_operand" "")
12295
   (match_operand 1 "" "")]
12296
  "TARGET_AVX"
12297
{
12298
  ix86_expand_vector_init (false, operands[0], operands[1]);
12299
  DONE;
12300
})
12301
 
12302
(define_insn "*vec_concat_avx"
12303
  [(set (match_operand:AVX256MODE 0 "register_operand"   "=x,x")
12304
        (vec_concat:AVX256MODE
12305
          (match_operand: 1 "register_operand" "x,x")
12306
          (match_operand: 2 "vector_move_operand" "xm,C")))]
12307
  "TARGET_AVX"
12308
{
12309
  switch (which_alternative)
12310
    {
12311
    case 0:
12312
      return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12313
    case 1:
12314
      switch (get_attr_mode (insn))
12315
        {
12316
        case MODE_V8SF:
12317
          return "vmovaps\t{%1, %x0|%x0, %1}";
12318
        case MODE_V4DF:
12319
          return "vmovapd\t{%1, %x0|%x0, %1}";
12320
        default:
12321
          return "vmovdqa\t{%1, %x0|%x0, %1}";
12322
        }
12323
    default:
12324
      gcc_unreachable ();
12325
    }
12326
}
12327
  [(set_attr "type" "sselog,ssemov")
12328
   (set_attr "prefix_extra" "1,*")
12329
   (set_attr "length_immediate" "1,*")
12330
   (set_attr "prefix" "vex")
12331
   (set_attr "mode" "")])

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.