OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-old/] [gcc-4.2.2/] [gcc/] [config/] [i386/] [sse.md] - Blame information for rev 825

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
;; GCC machine description for SSE instructions
2
;; Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
3
;;
4
;; This file is part of GCC.
5
;;
6
;; GCC is free software; you can redistribute it and/or modify
7
;; it under the terms of the GNU General Public License as published by
8
;; the Free Software Foundation; either version 3, or (at your option)
9
;; any later version.
10
;;
11
;; GCC is distributed in the hope that it will be useful,
12
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
;; GNU General Public License for more details.
15
;;
16
;; You should have received a copy of the GNU General Public License
17
;; along with GCC; see the file COPYING3.  If not see
18
;; .
19
 
20
 
21
;; 16 byte integral modes handled by SSE, minus TImode, which gets
22
;; special-cased for TARGET_64BIT.
23
(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
24
 
25
;; All 16-byte vector modes handled by SSE
26
(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
27
 
28
;; Mix-n-match
29
(define_mode_macro SSEMODE12 [V16QI V8HI])
30
(define_mode_macro SSEMODE24 [V8HI V4SI])
31
(define_mode_macro SSEMODE14 [V16QI V4SI])
32
(define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
33
(define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
34
 
35
;; Mapping from integer vector mode to mnemonic suffix
36
(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
37
 
38
;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
39
 
40
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
41
;;
42
;; Move patterns
43
;;
44
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
45
 
46
;; All of these patterns are enabled for SSE1 as well as SSE2.
47
;; This is essential for maintaining stable calling conventions.
48
 
49
(define_expand "mov"
50
  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
51
        (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
52
  "TARGET_SSE"
53
{
54
  ix86_expand_vector_move (mode, operands);
55
  DONE;
56
})
57
 
58
(define_insn "*mov_internal"
59
  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
60
        (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand"  "C ,xm,x"))]
61
  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
62
{
63
  switch (which_alternative)
64
    {
65
    case 0:
66
      return standard_sse_constant_opcode (insn, operands[1]);
67
    case 1:
68
    case 2:
69
      if (get_attr_mode (insn) == MODE_V4SF)
70
        return "movaps\t{%1, %0|%0, %1}";
71
      else
72
        return "movdqa\t{%1, %0|%0, %1}";
73
    default:
74
      gcc_unreachable ();
75
    }
76
}
77
  [(set_attr "type" "sselog1,ssemov,ssemov")
78
   (set (attr "mode")
79
        (if_then_else
80
          (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
81
                    (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
82
               (and (eq_attr "alternative" "2")
83
                    (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
84
                        (const_int 0))))
85
          (const_string "V4SF")
86
          (const_string "TI")))])
87
 
88
(define_expand "movv4sf"
89
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
90
        (match_operand:V4SF 1 "nonimmediate_operand" ""))]
91
  "TARGET_SSE"
92
{
93
  ix86_expand_vector_move (V4SFmode, operands);
94
  DONE;
95
})
96
 
97
(define_insn "*movv4sf_internal"
98
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
99
        (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
100
  "TARGET_SSE"
101
{
102
  switch (which_alternative)
103
    {
104
    case 0:
105
      return standard_sse_constant_opcode (insn, operands[1]);
106
    case 1:
107
    case 2:
108
      return "movaps\t{%1, %0|%0, %1}";
109
    default:
110
      abort();
111
    }
112
}
113
  [(set_attr "type" "sselog1,ssemov,ssemov")
114
   (set_attr "mode" "V4SF")])
115
 
116
(define_split
117
  [(set (match_operand:V4SF 0 "register_operand" "")
118
        (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
119
  "TARGET_SSE && reload_completed"
120
  [(set (match_dup 0)
121
        (vec_merge:V4SF
122
          (vec_duplicate:V4SF (match_dup 1))
123
          (match_dup 2)
124
          (const_int 1)))]
125
{
126
  operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
127
  operands[2] = CONST0_RTX (V4SFmode);
128
})
129
 
130
(define_expand "movv2df"
131
  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
132
        (match_operand:V2DF 1 "nonimmediate_operand" ""))]
133
  "TARGET_SSE"
134
{
135
  ix86_expand_vector_move (V2DFmode, operands);
136
  DONE;
137
})
138
 
139
(define_insn "*movv2df_internal"
140
  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
141
        (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
142
  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
143
{
144
  switch (which_alternative)
145
    {
146
    case 0:
147
      return standard_sse_constant_opcode (insn, operands[1]);
148
    case 1:
149
    case 2:
150
      if (get_attr_mode (insn) == MODE_V4SF)
151
        return "movaps\t{%1, %0|%0, %1}";
152
      else
153
        return "movapd\t{%1, %0|%0, %1}";
154
    default:
155
      gcc_unreachable ();
156
    }
157
}
158
  [(set_attr "type" "sselog1,ssemov,ssemov")
159
   (set (attr "mode")
160
        (if_then_else
161
          (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
162
                    (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
163
               (and (eq_attr "alternative" "2")
164
                    (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
165
                        (const_int 0))))
166
          (const_string "V4SF")
167
          (const_string "V2DF")))])
168
 
169
(define_split
170
  [(set (match_operand:V2DF 0 "register_operand" "")
171
        (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
172
  "TARGET_SSE2 && reload_completed"
173
  [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
174
{
175
  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
176
  operands[2] = CONST0_RTX (DFmode);
177
})
178
 
179
(define_expand "push1"
180
  [(match_operand:SSEMODE 0 "register_operand" "")]
181
  "TARGET_SSE"
182
{
183
  ix86_expand_push (mode, operands[0]);
184
  DONE;
185
})
186
 
187
(define_expand "movmisalign"
188
  [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
189
        (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
190
  "TARGET_SSE"
191
{
192
  ix86_expand_vector_move_misalign (mode, operands);
193
  DONE;
194
})
195
 
196
(define_insn "sse_movups"
197
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
198
        (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
199
                     UNSPEC_MOVU))]
200
  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
201
  "movups\t{%1, %0|%0, %1}"
202
  [(set_attr "type" "ssemov")
203
   (set_attr "mode" "V2DF")])
204
 
205
(define_insn "sse2_movupd"
206
  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
207
        (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
208
                     UNSPEC_MOVU))]
209
  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
210
  "movupd\t{%1, %0|%0, %1}"
211
  [(set_attr "type" "ssemov")
212
   (set_attr "mode" "V2DF")])
213
 
214
(define_insn "sse2_movdqu"
215
  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
216
        (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
217
                      UNSPEC_MOVU))]
218
  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
219
  "movdqu\t{%1, %0|%0, %1}"
220
  [(set_attr "type" "ssemov")
221
   (set_attr "mode" "TI")])
222
 
223
(define_insn "sse_movntv4sf"
224
  [(set (match_operand:V4SF 0 "memory_operand" "=m")
225
        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
226
                     UNSPEC_MOVNT))]
227
  "TARGET_SSE"
228
  "movntps\t{%1, %0|%0, %1}"
229
  [(set_attr "type" "ssemov")
230
   (set_attr "mode" "V4SF")])
231
 
232
(define_insn "sse2_movntv2df"
233
  [(set (match_operand:V2DF 0 "memory_operand" "=m")
234
        (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
235
                     UNSPEC_MOVNT))]
236
  "TARGET_SSE2"
237
  "movntpd\t{%1, %0|%0, %1}"
238
  [(set_attr "type" "ssecvt")
239
   (set_attr "mode" "V2DF")])
240
 
241
(define_insn "sse2_movntv2di"
242
  [(set (match_operand:V2DI 0 "memory_operand" "=m")
243
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
244
                     UNSPEC_MOVNT))]
245
  "TARGET_SSE2"
246
  "movntdq\t{%1, %0|%0, %1}"
247
  [(set_attr "type" "ssecvt")
248
   (set_attr "mode" "TI")])
249
 
250
(define_insn "sse2_movntsi"
251
  [(set (match_operand:SI 0 "memory_operand" "=m")
252
        (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
253
                   UNSPEC_MOVNT))]
254
  "TARGET_SSE2"
255
  "movnti\t{%1, %0|%0, %1}"
256
  [(set_attr "type" "ssecvt")
257
   (set_attr "mode" "V2DF")])
258
 
259
(define_insn "sse3_lddqu"
260
  [(set (match_operand:V16QI 0 "register_operand" "=x")
261
        (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
262
                      UNSPEC_LDQQU))]
263
  "TARGET_SSE3"
264
  "lddqu\t{%1, %0|%0, %1}"
265
  [(set_attr "type" "ssecvt")
266
   (set_attr "mode" "TI")])
267
 
268
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
269
;;
270
;; Parallel single-precision floating point arithmetic
271
;;
272
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
273
 
274
(define_expand "negv4sf2"
275
  [(set (match_operand:V4SF 0 "register_operand" "")
276
        (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
277
  "TARGET_SSE"
278
  "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
279
 
280
(define_expand "absv4sf2"
281
  [(set (match_operand:V4SF 0 "register_operand" "")
282
        (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
283
  "TARGET_SSE"
284
  "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
285
 
286
(define_expand "addv4sf3"
287
  [(set (match_operand:V4SF 0 "register_operand" "")
288
        (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
289
                   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
290
  "TARGET_SSE"
291
  "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
292
 
293
(define_insn "*addv4sf3"
294
  [(set (match_operand:V4SF 0 "register_operand" "=x")
295
        (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
296
                   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
297
  "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
298
  "addps\t{%2, %0|%0, %2}"
299
  [(set_attr "type" "sseadd")
300
   (set_attr "mode" "V4SF")])
301
 
302
(define_insn "sse_vmaddv4sf3"
303
  [(set (match_operand:V4SF 0 "register_operand" "=x")
304
        (vec_merge:V4SF
305
          (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
306
                     (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
307
          (match_dup 1)
308
          (const_int 1)))]
309
  "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
310
  "addss\t{%2, %0|%0, %2}"
311
  [(set_attr "type" "sseadd")
312
   (set_attr "mode" "SF")])
313
 
314
(define_expand "subv4sf3"
315
  [(set (match_operand:V4SF 0 "register_operand" "")
316
        (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
317
                    (match_operand:V4SF 2 "nonimmediate_operand" "")))]
318
  "TARGET_SSE"
319
  "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
320
 
321
(define_insn "*subv4sf3"
322
  [(set (match_operand:V4SF 0 "register_operand" "=x")
323
        (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
324
                    (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
325
  "TARGET_SSE"
326
  "subps\t{%2, %0|%0, %2}"
327
  [(set_attr "type" "sseadd")
328
   (set_attr "mode" "V4SF")])
329
 
330
(define_insn "sse_vmsubv4sf3"
331
  [(set (match_operand:V4SF 0 "register_operand" "=x")
332
        (vec_merge:V4SF
333
          (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
334
                      (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
335
          (match_dup 1)
336
          (const_int 1)))]
337
  "TARGET_SSE"
338
  "subss\t{%2, %0|%0, %2}"
339
  [(set_attr "type" "sseadd")
340
   (set_attr "mode" "SF")])
341
 
342
(define_expand "mulv4sf3"
343
  [(set (match_operand:V4SF 0 "register_operand" "")
344
        (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
345
                   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
346
  "TARGET_SSE"
347
  "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
348
 
349
(define_insn "*mulv4sf3"
350
  [(set (match_operand:V4SF 0 "register_operand" "=x")
351
        (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
352
                   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
353
  "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
354
  "mulps\t{%2, %0|%0, %2}"
355
  [(set_attr "type" "ssemul")
356
   (set_attr "mode" "V4SF")])
357
 
358
(define_insn "sse_vmmulv4sf3"
359
  [(set (match_operand:V4SF 0 "register_operand" "=x")
360
        (vec_merge:V4SF
361
          (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
362
                     (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
363
          (match_dup 1)
364
          (const_int 1)))]
365
  "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
366
  "mulss\t{%2, %0|%0, %2}"
367
  [(set_attr "type" "ssemul")
368
   (set_attr "mode" "SF")])
369
 
370
(define_expand "divv4sf3"
371
  [(set (match_operand:V4SF 0 "register_operand" "")
372
        (div:V4SF (match_operand:V4SF 1 "register_operand" "")
373
                  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
374
  "TARGET_SSE"
375
  "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
376
 
377
(define_insn "*divv4sf3"
378
  [(set (match_operand:V4SF 0 "register_operand" "=x")
379
        (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
380
                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
381
  "TARGET_SSE"
382
  "divps\t{%2, %0|%0, %2}"
383
  [(set_attr "type" "ssediv")
384
   (set_attr "mode" "V4SF")])
385
 
386
(define_insn "sse_vmdivv4sf3"
387
  [(set (match_operand:V4SF 0 "register_operand" "=x")
388
        (vec_merge:V4SF
389
          (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
390
                    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
391
          (match_dup 1)
392
          (const_int 1)))]
393
  "TARGET_SSE"
394
  "divss\t{%2, %0|%0, %2}"
395
  [(set_attr "type" "ssediv")
396
   (set_attr "mode" "SF")])
397
 
398
(define_insn "sse_rcpv4sf2"
399
  [(set (match_operand:V4SF 0 "register_operand" "=x")
400
        (unspec:V4SF
401
         [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
402
  "TARGET_SSE"
403
  "rcpps\t{%1, %0|%0, %1}"
404
  [(set_attr "type" "sse")
405
   (set_attr "mode" "V4SF")])
406
 
407
(define_insn "sse_vmrcpv4sf2"
408
  [(set (match_operand:V4SF 0 "register_operand" "=x")
409
        (vec_merge:V4SF
410
          (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
411
                       UNSPEC_RCP)
412
          (match_operand:V4SF 2 "register_operand" "0")
413
          (const_int 1)))]
414
  "TARGET_SSE"
415
  "rcpss\t{%1, %0|%0, %1}"
416
  [(set_attr "type" "sse")
417
   (set_attr "mode" "SF")])
418
 
419
(define_insn "sse_rsqrtv4sf2"
420
  [(set (match_operand:V4SF 0 "register_operand" "=x")
421
        (unspec:V4SF
422
          [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
423
  "TARGET_SSE"
424
  "rsqrtps\t{%1, %0|%0, %1}"
425
  [(set_attr "type" "sse")
426
   (set_attr "mode" "V4SF")])
427
 
428
(define_insn "sse_vmrsqrtv4sf2"
429
  [(set (match_operand:V4SF 0 "register_operand" "=x")
430
        (vec_merge:V4SF
431
          (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
432
                       UNSPEC_RSQRT)
433
          (match_operand:V4SF 2 "register_operand" "0")
434
          (const_int 1)))]
435
  "TARGET_SSE"
436
  "rsqrtss\t{%1, %0|%0, %1}"
437
  [(set_attr "type" "sse")
438
   (set_attr "mode" "SF")])
439
 
440
(define_insn "sqrtv4sf2"
441
  [(set (match_operand:V4SF 0 "register_operand" "=x")
442
        (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
443
  "TARGET_SSE"
444
  "sqrtps\t{%1, %0|%0, %1}"
445
  [(set_attr "type" "sse")
446
   (set_attr "mode" "V4SF")])
447
 
448
(define_insn "sse_vmsqrtv4sf2"
449
  [(set (match_operand:V4SF 0 "register_operand" "=x")
450
        (vec_merge:V4SF
451
          (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
452
          (match_operand:V4SF 2 "register_operand" "0")
453
          (const_int 1)))]
454
  "TARGET_SSE"
455
  "sqrtss\t{%1, %0|%0, %1}"
456
  [(set_attr "type" "sse")
457
   (set_attr "mode" "SF")])
458
 
459
;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
460
;; isn't really correct, as those rtl operators aren't defined when
461
;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
462
 
463
(define_expand "smaxv4sf3"
464
  [(set (match_operand:V4SF 0 "register_operand" "")
465
        (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
466
                   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
467
  "TARGET_SSE"
468
{
469
  if (!flag_finite_math_only)
470
    operands[1] = force_reg (V4SFmode, operands[1]);
471
  ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
472
})
473
 
474
(define_insn "*smaxv4sf3_finite"
475
  [(set (match_operand:V4SF 0 "register_operand" "=x")
476
        (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
477
                   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
478
  "TARGET_SSE && flag_finite_math_only
479
   && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
480
  "maxps\t{%2, %0|%0, %2}"
481
  [(set_attr "type" "sse")
482
   (set_attr "mode" "V4SF")])
483
 
484
(define_insn "*smaxv4sf3"
485
  [(set (match_operand:V4SF 0 "register_operand" "=x")
486
        (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
487
                   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
488
  "TARGET_SSE"
489
  "maxps\t{%2, %0|%0, %2}"
490
  [(set_attr "type" "sse")
491
   (set_attr "mode" "V4SF")])
492
 
493
(define_insn "sse_vmsmaxv4sf3"
494
  [(set (match_operand:V4SF 0 "register_operand" "=x")
495
        (vec_merge:V4SF
496
         (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
497
                    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
498
         (match_dup 1)
499
         (const_int 1)))]
500
  "TARGET_SSE"
501
  "maxss\t{%2, %0|%0, %2}"
502
  [(set_attr "type" "sse")
503
   (set_attr "mode" "SF")])
504
 
505
(define_expand "sminv4sf3"
506
  [(set (match_operand:V4SF 0 "register_operand" "")
507
        (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
508
                   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
509
  "TARGET_SSE"
510
{
511
  if (!flag_finite_math_only)
512
    operands[1] = force_reg (V4SFmode, operands[1]);
513
  ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
514
})
515
 
516
(define_insn "*sminv4sf3_finite"
517
  [(set (match_operand:V4SF 0 "register_operand" "=x")
518
        (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
519
                   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
520
  "TARGET_SSE && flag_finite_math_only
521
   && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
522
  "minps\t{%2, %0|%0, %2}"
523
  [(set_attr "type" "sse")
524
   (set_attr "mode" "V4SF")])
525
 
526
(define_insn "*sminv4sf3"
527
  [(set (match_operand:V4SF 0 "register_operand" "=x")
528
        (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
529
                   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
530
  "TARGET_SSE"
531
  "minps\t{%2, %0|%0, %2}"
532
  [(set_attr "type" "sse")
533
   (set_attr "mode" "V4SF")])
534
 
535
(define_insn "sse_vmsminv4sf3"
536
  [(set (match_operand:V4SF 0 "register_operand" "=x")
537
        (vec_merge:V4SF
538
         (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
539
                    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
540
         (match_dup 1)
541
         (const_int 1)))]
542
  "TARGET_SSE"
543
  "minss\t{%2, %0|%0, %2}"
544
  [(set_attr "type" "sse")
545
   (set_attr "mode" "SF")])
546
 
547
;; These versions of the min/max patterns implement exactly the operations
548
;;   min = (op1 < op2 ? op1 : op2)
549
;;   max = (!(op1 < op2) ? op1 : op2)
550
;; Their operands are not commutative, and thus they may be used in the
551
;; presence of -0.0 and NaN.
552
 
553
(define_insn "*ieee_sminv4sf3"
554
  [(set (match_operand:V4SF 0 "register_operand" "=x")
555
        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
556
                      (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
557
                     UNSPEC_IEEE_MIN))]
558
  "TARGET_SSE"
559
  "minps\t{%2, %0|%0, %2}"
560
  [(set_attr "type" "sseadd")
561
   (set_attr "mode" "V4SF")])
562
 
563
(define_insn "*ieee_smaxv4sf3"
564
  [(set (match_operand:V4SF 0 "register_operand" "=x")
565
        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
566
                      (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
567
                     UNSPEC_IEEE_MAX))]
568
  "TARGET_SSE"
569
  "maxps\t{%2, %0|%0, %2}"
570
  [(set_attr "type" "sseadd")
571
   (set_attr "mode" "V4SF")])
572
 
573
(define_insn "*ieee_sminv2df3"
574
  [(set (match_operand:V2DF 0 "register_operand" "=x")
575
        (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
576
                      (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
577
                     UNSPEC_IEEE_MIN))]
578
  "TARGET_SSE2"
579
  "minpd\t{%2, %0|%0, %2}"
580
  [(set_attr "type" "sseadd")
581
   (set_attr "mode" "V2DF")])
582
 
583
(define_insn "*ieee_smaxv2df3"
584
  [(set (match_operand:V2DF 0 "register_operand" "=x")
585
        (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
586
                      (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
587
                     UNSPEC_IEEE_MAX))]
588
  "TARGET_SSE2"
589
  "maxpd\t{%2, %0|%0, %2}"
590
  [(set_attr "type" "sseadd")
591
   (set_attr "mode" "V2DF")])
592
 
593
(define_insn "sse3_addsubv4sf3"
594
  [(set (match_operand:V4SF 0 "register_operand" "=x")
595
        (vec_merge:V4SF
596
          (plus:V4SF
597
            (match_operand:V4SF 1 "register_operand" "0")
598
            (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
599
          (minus:V4SF (match_dup 1) (match_dup 2))
600
          (const_int 5)))]
601
  "TARGET_SSE3"
602
  "addsubps\t{%2, %0|%0, %2}"
603
  [(set_attr "type" "sseadd")
604
   (set_attr "mode" "V4SF")])
605
 
606
(define_insn "sse3_haddv4sf3"
607
  [(set (match_operand:V4SF 0 "register_operand" "=x")
608
        (vec_concat:V4SF
609
          (vec_concat:V2SF
610
            (plus:SF
611
              (vec_select:SF
612
                (match_operand:V4SF 1 "register_operand" "0")
613
                (parallel [(const_int 0)]))
614
              (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
615
            (plus:SF
616
              (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
617
              (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
618
          (vec_concat:V2SF
619
            (plus:SF
620
              (vec_select:SF
621
                (match_operand:V4SF 2 "nonimmediate_operand" "xm")
622
                (parallel [(const_int 0)]))
623
              (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
624
            (plus:SF
625
              (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
626
              (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
627
  "TARGET_SSE3"
628
  "haddps\t{%2, %0|%0, %2}"
629
  [(set_attr "type" "sseadd")
630
   (set_attr "mode" "V4SF")])
631
 
632
(define_insn "sse3_hsubv4sf3"
633
  [(set (match_operand:V4SF 0 "register_operand" "=x")
634
        (vec_concat:V4SF
635
          (vec_concat:V2SF
636
            (minus:SF
637
              (vec_select:SF
638
                (match_operand:V4SF 1 "register_operand" "0")
639
                (parallel [(const_int 0)]))
640
              (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
641
            (minus:SF
642
              (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
643
              (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
644
          (vec_concat:V2SF
645
            (minus:SF
646
              (vec_select:SF
647
                (match_operand:V4SF 2 "nonimmediate_operand" "xm")
648
                (parallel [(const_int 0)]))
649
              (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
650
            (minus:SF
651
              (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
652
              (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
653
  "TARGET_SSE3"
654
  "hsubps\t{%2, %0|%0, %2}"
655
  [(set_attr "type" "sseadd")
656
   (set_attr "mode" "V4SF")])
657
 
658
(define_expand "reduc_splus_v4sf"
659
  [(match_operand:V4SF 0 "register_operand" "")
660
   (match_operand:V4SF 1 "register_operand" "")]
661
  "TARGET_SSE"
662
{
663
  if (TARGET_SSE3)
664
    {
665
      rtx tmp = gen_reg_rtx (V4SFmode);
666
      emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
667
      emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
668
    }
669
  else
670
    ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
671
  DONE;
672
})
673
 
674
(define_expand "reduc_smax_v4sf"
675
  [(match_operand:V4SF 0 "register_operand" "")
676
   (match_operand:V4SF 1 "register_operand" "")]
677
  "TARGET_SSE"
678
{
679
  ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
680
  DONE;
681
})
682
 
683
(define_expand "reduc_smin_v4sf"
684
  [(match_operand:V4SF 0 "register_operand" "")
685
   (match_operand:V4SF 1 "register_operand" "")]
686
  "TARGET_SSE"
687
{
688
  ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
689
  DONE;
690
})
691
 
692
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
693
;;
694
;; Parallel single-precision floating point comparisons
695
;;
696
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
697
 
698
(define_insn "sse_maskcmpv4sf3"
699
  [(set (match_operand:V4SF 0 "register_operand" "=x")
700
        (match_operator:V4SF 3 "sse_comparison_operator"
701
                [(match_operand:V4SF 1 "register_operand" "0")
702
                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
703
  "TARGET_SSE"
704
  "cmp%D3ps\t{%2, %0|%0, %2}"
705
  [(set_attr "type" "ssecmp")
706
   (set_attr "mode" "V4SF")])
707
 
708
(define_insn "sse_vmmaskcmpv4sf3"
709
  [(set (match_operand:V4SF 0 "register_operand" "=x")
710
        (vec_merge:V4SF
711
         (match_operator:V4SF 3 "sse_comparison_operator"
712
                [(match_operand:V4SF 1 "register_operand" "0")
713
                 (match_operand:V4SF 2 "register_operand" "x")])
714
         (match_dup 1)
715
         (const_int 1)))]
716
  "TARGET_SSE"
717
  "cmp%D3ss\t{%2, %0|%0, %2}"
718
  [(set_attr "type" "ssecmp")
719
   (set_attr "mode" "SF")])
720
 
721
(define_insn "sse_comi"
722
  [(set (reg:CCFP FLAGS_REG)
723
        (compare:CCFP
724
          (vec_select:SF
725
            (match_operand:V4SF 0 "register_operand" "x")
726
            (parallel [(const_int 0)]))
727
          (vec_select:SF
728
            (match_operand:V4SF 1 "nonimmediate_operand" "xm")
729
            (parallel [(const_int 0)]))))]
730
  "TARGET_SSE"
731
  "comiss\t{%1, %0|%0, %1}"
732
  [(set_attr "type" "ssecomi")
733
   (set_attr "mode" "SF")])
734
 
735
(define_insn "sse_ucomi"
736
  [(set (reg:CCFPU FLAGS_REG)
737
        (compare:CCFPU
738
          (vec_select:SF
739
            (match_operand:V4SF 0 "register_operand" "x")
740
            (parallel [(const_int 0)]))
741
          (vec_select:SF
742
            (match_operand:V4SF 1 "nonimmediate_operand" "xm")
743
            (parallel [(const_int 0)]))))]
744
  "TARGET_SSE"
745
  "ucomiss\t{%1, %0|%0, %1}"
746
  [(set_attr "type" "ssecomi")
747
   (set_attr "mode" "SF")])
748
 
749
(define_expand "vcondv4sf"
750
  [(set (match_operand:V4SF 0 "register_operand" "")
751
        (if_then_else:V4SF
752
          (match_operator 3 ""
753
            [(match_operand:V4SF 4 "nonimmediate_operand" "")
754
             (match_operand:V4SF 5 "nonimmediate_operand" "")])
755
          (match_operand:V4SF 1 "general_operand" "")
756
          (match_operand:V4SF 2 "general_operand" "")))]
757
  "TARGET_SSE"
758
{
759
  if (ix86_expand_fp_vcond (operands))
760
    DONE;
761
  else
762
    FAIL;
763
})
764
 
765
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
766
;;
767
;; Parallel single-precision floating point logical operations
768
;;
769
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
770
 
771
(define_expand "andv4sf3"
772
  [(set (match_operand:V4SF 0 "register_operand" "")
773
        (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
774
                  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
775
  "TARGET_SSE"
776
  "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
777
 
778
(define_insn "*andv4sf3"
779
  [(set (match_operand:V4SF 0 "register_operand" "=x")
780
        (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
781
                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
782
  "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
783
  "andps\t{%2, %0|%0, %2}"
784
  [(set_attr "type" "sselog")
785
   (set_attr "mode" "V4SF")])
786
 
787
(define_insn "sse_nandv4sf3"
788
  [(set (match_operand:V4SF 0 "register_operand" "=x")
789
        (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
790
                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
791
  "TARGET_SSE"
792
  "andnps\t{%2, %0|%0, %2}"
793
  [(set_attr "type" "sselog")
794
   (set_attr "mode" "V4SF")])
795
 
796
(define_expand "iorv4sf3"
797
  [(set (match_operand:V4SF 0 "register_operand" "")
798
        (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
799
                  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
800
  "TARGET_SSE"
801
  "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
802
 
803
(define_insn "*iorv4sf3"
804
  [(set (match_operand:V4SF 0 "register_operand" "=x")
805
        (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
806
                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
807
  "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
808
  "orps\t{%2, %0|%0, %2}"
809
  [(set_attr "type" "sselog")
810
   (set_attr "mode" "V4SF")])
811
 
812
(define_expand "xorv4sf3"
813
  [(set (match_operand:V4SF 0 "register_operand" "")
814
        (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
815
                  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
816
  "TARGET_SSE"
817
  "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
818
 
819
(define_insn "*xorv4sf3"
820
  [(set (match_operand:V4SF 0 "register_operand" "=x")
821
        (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
822
                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
823
  "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
824
  "xorps\t{%2, %0|%0, %2}"
825
  [(set_attr "type" "sselog")
826
   (set_attr "mode" "V4SF")])
827
 
828
;; Also define scalar versions.  These are used for abs, neg, and
829
;; conditional move.  Using subregs into vector modes causes register
830
;; allocation lossage.  These patterns do not allow memory operands
831
;; because the native instructions read the full 128-bits.
832
 
833
(define_insn "*andsf3"
834
  [(set (match_operand:SF 0 "register_operand" "=x")
835
        (and:SF (match_operand:SF 1 "register_operand" "0")
836
                (match_operand:SF 2 "register_operand" "x")))]
837
  "TARGET_SSE"
838
  "andps\t{%2, %0|%0, %2}"
839
  [(set_attr "type" "sselog")
840
   (set_attr "mode" "V4SF")])
841
 
842
(define_insn "*nandsf3"
843
  [(set (match_operand:SF 0 "register_operand" "=x")
844
        (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
845
                (match_operand:SF 2 "register_operand" "x")))]
846
  "TARGET_SSE"
847
  "andnps\t{%2, %0|%0, %2}"
848
  [(set_attr "type" "sselog")
849
   (set_attr "mode" "V4SF")])
850
 
851
(define_insn "*iorsf3"
852
  [(set (match_operand:SF 0 "register_operand" "=x")
853
        (ior:SF (match_operand:SF 1 "register_operand" "0")
854
                (match_operand:SF 2 "register_operand" "x")))]
855
  "TARGET_SSE"
856
  "orps\t{%2, %0|%0, %2}"
857
  [(set_attr "type" "sselog")
858
   (set_attr "mode" "V4SF")])
859
 
860
(define_insn "*xorsf3"
861
  [(set (match_operand:SF 0 "register_operand" "=x")
862
        (xor:SF (match_operand:SF 1 "register_operand" "0")
863
                (match_operand:SF 2 "register_operand" "x")))]
864
  "TARGET_SSE"
865
  "xorps\t{%2, %0|%0, %2}"
866
  [(set_attr "type" "sselog")
867
   (set_attr "mode" "V4SF")])
868
 
869
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
870
;;
871
;; Parallel single-precision floating point conversion operations
872
;;
873
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
874
 
875
(define_insn "sse_cvtpi2ps"
876
  [(set (match_operand:V4SF 0 "register_operand" "=x")
877
        (vec_merge:V4SF
878
          (vec_duplicate:V4SF
879
            (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
880
          (match_operand:V4SF 1 "register_operand" "0")
881
          (const_int 3)))]
882
  "TARGET_SSE"
883
  "cvtpi2ps\t{%2, %0|%0, %2}"
884
  [(set_attr "type" "ssecvt")
885
   (set_attr "mode" "V4SF")])
886
 
887
(define_insn "sse_cvtps2pi"
888
  [(set (match_operand:V2SI 0 "register_operand" "=y")
889
        (vec_select:V2SI
890
          (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
891
                       UNSPEC_FIX_NOTRUNC)
892
          (parallel [(const_int 0) (const_int 1)])))]
893
  "TARGET_SSE"
894
  "cvtps2pi\t{%1, %0|%0, %1}"
895
  [(set_attr "type" "ssecvt")
896
   (set_attr "unit" "mmx")
897
   (set_attr "mode" "DI")])
898
 
899
(define_insn "sse_cvttps2pi"
900
  [(set (match_operand:V2SI 0 "register_operand" "=y")
901
        (vec_select:V2SI
902
          (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
903
          (parallel [(const_int 0) (const_int 1)])))]
904
  "TARGET_SSE"
905
  "cvttps2pi\t{%1, %0|%0, %1}"
906
  [(set_attr "type" "ssecvt")
907
   (set_attr "unit" "mmx")
908
   (set_attr "mode" "SF")])
909
 
910
(define_insn "sse_cvtsi2ss"
911
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
912
        (vec_merge:V4SF
913
          (vec_duplicate:V4SF
914
            (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
915
          (match_operand:V4SF 1 "register_operand" "0,0")
916
          (const_int 1)))]
917
  "TARGET_SSE"
918
  "cvtsi2ss\t{%2, %0|%0, %2}"
919
  [(set_attr "type" "sseicvt")
920
   (set_attr "athlon_decode" "vector,double")
921
   (set_attr "mode" "SF")])
922
 
923
(define_insn "sse_cvtsi2ssq"
924
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
925
        (vec_merge:V4SF
926
          (vec_duplicate:V4SF
927
            (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
928
          (match_operand:V4SF 1 "register_operand" "0,0")
929
          (const_int 1)))]
930
  "TARGET_SSE && TARGET_64BIT"
931
  "cvtsi2ssq\t{%2, %0|%0, %2}"
932
  [(set_attr "type" "sseicvt")
933
   (set_attr "athlon_decode" "vector,double")
934
   (set_attr "mode" "SF")])
935
 
936
(define_insn "sse_cvtss2si"
937
  [(set (match_operand:SI 0 "register_operand" "=r,r")
938
        (unspec:SI
939
          [(vec_select:SF
940
             (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
941
             (parallel [(const_int 0)]))]
942
          UNSPEC_FIX_NOTRUNC))]
943
  "TARGET_SSE"
944
  "cvtss2si\t{%1, %0|%0, %1}"
945
  [(set_attr "type" "sseicvt")
946
   (set_attr "athlon_decode" "double,vector")
947
   (set_attr "mode" "SI")])
948
 
949
(define_insn "sse_cvtss2siq"
950
  [(set (match_operand:DI 0 "register_operand" "=r,r")
951
        (unspec:DI
952
          [(vec_select:SF
953
             (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
954
             (parallel [(const_int 0)]))]
955
          UNSPEC_FIX_NOTRUNC))]
956
  "TARGET_SSE && TARGET_64BIT"
957
  "cvtss2siq\t{%1, %0|%0, %1}"
958
  [(set_attr "type" "sseicvt")
959
   (set_attr "athlon_decode" "double,vector")
960
   (set_attr "mode" "DI")])
961
 
962
(define_insn "sse_cvttss2si"
963
  [(set (match_operand:SI 0 "register_operand" "=r,r")
964
        (fix:SI
965
          (vec_select:SF
966
            (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
967
            (parallel [(const_int 0)]))))]
968
  "TARGET_SSE"
969
  "cvttss2si\t{%1, %0|%0, %1}"
970
  [(set_attr "type" "sseicvt")
971
   (set_attr "athlon_decode" "double,vector")
972
   (set_attr "mode" "SI")])
973
 
974
(define_insn "sse_cvttss2siq"
975
  [(set (match_operand:DI 0 "register_operand" "=r,r")
976
        (fix:DI
977
          (vec_select:SF
978
            (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
979
            (parallel [(const_int 0)]))))]
980
  "TARGET_SSE && TARGET_64BIT"
981
  "cvttss2siq\t{%1, %0|%0, %1}"
982
  [(set_attr "type" "sseicvt")
983
   (set_attr "athlon_decode" "double,vector")
984
   (set_attr "mode" "DI")])
985
 
986
(define_insn "sse2_cvtdq2ps"
987
  [(set (match_operand:V4SF 0 "register_operand" "=x")
988
        (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
989
  "TARGET_SSE2"
990
  "cvtdq2ps\t{%1, %0|%0, %1}"
991
  [(set_attr "type" "ssecvt")
992
   (set_attr "mode" "V2DF")])
993
 
994
(define_insn "sse2_cvtps2dq"
995
  [(set (match_operand:V4SI 0 "register_operand" "=x")
996
        (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
997
                     UNSPEC_FIX_NOTRUNC))]
998
  "TARGET_SSE2"
999
  "cvtps2dq\t{%1, %0|%0, %1}"
1000
  [(set_attr "type" "ssecvt")
1001
   (set_attr "mode" "TI")])
1002
 
1003
(define_insn "sse2_cvttps2dq"
1004
  [(set (match_operand:V4SI 0 "register_operand" "=x")
1005
        (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1006
  "TARGET_SSE2"
1007
  "cvttps2dq\t{%1, %0|%0, %1}"
1008
  [(set_attr "type" "ssecvt")
1009
   (set_attr "mode" "TI")])
1010
 
1011
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1012
;;
1013
;; Parallel single-precision floating point element swizzling
1014
;;
1015
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1016
 
1017
(define_insn "sse_movhlps"
1018
  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,m")
1019
        (vec_select:V4SF
1020
          (vec_concat:V8SF
1021
            (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1022
            (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1023
          (parallel [(const_int 6)
1024
                     (const_int 7)
1025
                     (const_int 2)
1026
                     (const_int 3)])))]
1027
  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1028
  "@
1029
   movhlps\t{%2, %0|%0, %2}
1030
   movlps\t{%H2, %0|%0, %H2}
1031
   movhps\t{%2, %0|%0, %2}"
1032
  [(set_attr "type" "ssemov")
1033
   (set_attr "mode" "V4SF,V2SF,V2SF")])
1034
 
1035
(define_insn "sse_movlhps"
1036
  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,o")
1037
        (vec_select:V4SF
1038
          (vec_concat:V8SF
1039
            (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1040
            (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1041
          (parallel [(const_int 0)
1042
                     (const_int 1)
1043
                     (const_int 4)
1044
                     (const_int 5)])))]
1045
  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1046
  "@
1047
   movlhps\t{%2, %0|%0, %2}
1048
   movhps\t{%2, %0|%0, %2}
1049
   movlps\t{%2, %H0|%H0, %2}"
1050
  [(set_attr "type" "ssemov")
1051
   (set_attr "mode" "V4SF,V2SF,V2SF")])
1052
 
1053
(define_insn "sse_unpckhps"
1054
  [(set (match_operand:V4SF 0 "register_operand" "=x")
1055
        (vec_select:V4SF
1056
          (vec_concat:V8SF
1057
            (match_operand:V4SF 1 "register_operand" "0")
1058
            (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1059
          (parallel [(const_int 2) (const_int 6)
1060
                     (const_int 3) (const_int 7)])))]
1061
  "TARGET_SSE"
1062
  "unpckhps\t{%2, %0|%0, %2}"
1063
  [(set_attr "type" "sselog")
1064
   (set_attr "mode" "V4SF")])
1065
 
1066
(define_insn "sse_unpcklps"
1067
  [(set (match_operand:V4SF 0 "register_operand" "=x")
1068
        (vec_select:V4SF
1069
          (vec_concat:V8SF
1070
            (match_operand:V4SF 1 "register_operand" "0")
1071
            (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1072
          (parallel [(const_int 0) (const_int 4)
1073
                     (const_int 1) (const_int 5)])))]
1074
  "TARGET_SSE"
1075
  "unpcklps\t{%2, %0|%0, %2}"
1076
  [(set_attr "type" "sselog")
1077
   (set_attr "mode" "V4SF")])
1078
 
1079
;; These are modeled with the same vec_concat as the others so that we
1080
;; capture users of shufps that can use the new instructions
1081
(define_insn "sse3_movshdup"
1082
  [(set (match_operand:V4SF 0 "register_operand" "=x")
1083
        (vec_select:V4SF
1084
          (vec_concat:V8SF
1085
            (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1086
            (match_dup 1))
1087
          (parallel [(const_int 1)
1088
                     (const_int 1)
1089
                     (const_int 7)
1090
                     (const_int 7)])))]
1091
  "TARGET_SSE3"
1092
  "movshdup\t{%1, %0|%0, %1}"
1093
  [(set_attr "type" "sse")
1094
   (set_attr "mode" "V4SF")])
1095
 
1096
(define_insn "sse3_movsldup"
1097
  [(set (match_operand:V4SF 0 "register_operand" "=x")
1098
        (vec_select:V4SF
1099
          (vec_concat:V8SF
1100
            (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1101
            (match_dup 1))
1102
          (parallel [(const_int 0)
1103
                     (const_int 0)
1104
                     (const_int 6)
1105
                     (const_int 6)])))]
1106
  "TARGET_SSE3"
1107
  "movsldup\t{%1, %0|%0, %1}"
1108
  [(set_attr "type" "sse")
1109
   (set_attr "mode" "V4SF")])
1110
 
1111
(define_expand "sse_shufps"
1112
  [(match_operand:V4SF 0 "register_operand" "")
1113
   (match_operand:V4SF 1 "register_operand" "")
1114
   (match_operand:V4SF 2 "nonimmediate_operand" "")
1115
   (match_operand:SI 3 "const_int_operand" "")]
1116
  "TARGET_SSE"
1117
{
1118
  int mask = INTVAL (operands[3]);
1119
  emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1120
                               GEN_INT ((mask >> 0) & 3),
1121
                               GEN_INT ((mask >> 2) & 3),
1122
                               GEN_INT (((mask >> 4) & 3) + 4),
1123
                               GEN_INT (((mask >> 6) & 3) + 4)));
1124
  DONE;
1125
})
1126
 
1127
(define_insn "sse_shufps_1"
1128
  [(set (match_operand:V4SF 0 "register_operand" "=x")
1129
        (vec_select:V4SF
1130
          (vec_concat:V8SF
1131
            (match_operand:V4SF 1 "register_operand" "0")
1132
            (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1133
          (parallel [(match_operand 3 "const_0_to_3_operand" "")
1134
                     (match_operand 4 "const_0_to_3_operand" "")
1135
                     (match_operand 5 "const_4_to_7_operand" "")
1136
                     (match_operand 6 "const_4_to_7_operand" "")])))]
1137
  "TARGET_SSE"
1138
{
1139
  int mask = 0;
1140
  mask |= INTVAL (operands[3]) << 0;
1141
  mask |= INTVAL (operands[4]) << 2;
1142
  mask |= (INTVAL (operands[5]) - 4) << 4;
1143
  mask |= (INTVAL (operands[6]) - 4) << 6;
1144
  operands[3] = GEN_INT (mask);
1145
 
1146
  return "shufps\t{%3, %2, %0|%0, %2, %3}";
1147
}
1148
  [(set_attr "type" "sselog")
1149
   (set_attr "mode" "V4SF")])
1150
 
1151
(define_insn "sse_storehps"
1152
  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1153
        (vec_select:V2SF
1154
          (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1155
          (parallel [(const_int 2) (const_int 3)])))]
1156
  "TARGET_SSE"
1157
  "@
1158
   movhps\t{%1, %0|%0, %1}
1159
   movhlps\t{%1, %0|%0, %1}
1160
   movlps\t{%H1, %0|%0, %H1}"
1161
  [(set_attr "type" "ssemov")
1162
   (set_attr "mode" "V2SF,V4SF,V2SF")])
1163
 
1164
(define_insn "sse_loadhps"
1165
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1166
        (vec_concat:V4SF
1167
          (vec_select:V2SF
1168
            (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1169
            (parallel [(const_int 0) (const_int 1)]))
1170
          (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1171
  "TARGET_SSE"
1172
  "@
1173
   movhps\t{%2, %0|%0, %2}
1174
   movlhps\t{%2, %0|%0, %2}
1175
   movlps\t{%2, %H0|%H0, %2}"
1176
  [(set_attr "type" "ssemov")
1177
   (set_attr "mode" "V2SF,V4SF,V2SF")])
1178
 
1179
(define_insn "sse_storelps"
1180
  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1181
        (vec_select:V2SF
1182
          (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1183
          (parallel [(const_int 0) (const_int 1)])))]
1184
  "TARGET_SSE"
1185
  "@
1186
   movlps\t{%1, %0|%0, %1}
1187
   movaps\t{%1, %0|%0, %1}
1188
   movlps\t{%1, %0|%0, %1}"
1189
  [(set_attr "type" "ssemov")
1190
   (set_attr "mode" "V2SF,V4SF,V2SF")])
1191
 
1192
(define_insn "sse_loadlps"
1193
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1194
        (vec_concat:V4SF
1195
          (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1196
          (vec_select:V2SF
1197
            (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1198
            (parallel [(const_int 2) (const_int 3)]))))]
1199
  "TARGET_SSE"
1200
  "@
1201
   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1202
   movlps\t{%2, %0|%0, %2}
1203
   movlps\t{%2, %0|%0, %2}"
1204
  [(set_attr "type" "sselog,ssemov,ssemov")
1205
   (set_attr "mode" "V4SF,V2SF,V2SF")])
1206
 
1207
(define_insn "sse_movss"
1208
  [(set (match_operand:V4SF 0 "register_operand" "=x")
1209
        (vec_merge:V4SF
1210
          (match_operand:V4SF 2 "register_operand" "x")
1211
          (match_operand:V4SF 1 "register_operand" "0")
1212
          (const_int 1)))]
1213
  "TARGET_SSE"
1214
  "movss\t{%2, %0|%0, %2}"
1215
  [(set_attr "type" "ssemov")
1216
   (set_attr "mode" "SF")])
1217
 
1218
(define_insn "*vec_dupv4sf"
1219
  [(set (match_operand:V4SF 0 "register_operand" "=x")
1220
        (vec_duplicate:V4SF
1221
          (match_operand:SF 1 "register_operand" "0")))]
1222
  "TARGET_SSE"
1223
  "shufps\t{$0, %0, %0|%0, %0, 0}"
1224
  [(set_attr "type" "sselog1")
1225
   (set_attr "mode" "V4SF")])
1226
 
1227
;; ??? In theory we can match memory for the MMX alternative, but allowing
1228
;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1229
;; alternatives pretty much forces the MMX alternative to be chosen.
1230
(define_insn "*sse_concatv2sf"
1231
  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
1232
        (vec_concat:V2SF
1233
          (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1234
          (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
1235
  "TARGET_SSE"
1236
  "@
1237
   unpcklps\t{%2, %0|%0, %2}
1238
   movss\t{%1, %0|%0, %1}
1239
   punpckldq\t{%2, %0|%0, %2}
1240
   movd\t{%1, %0|%0, %1}"
1241
  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1242
   (set_attr "mode" "V4SF,SF,DI,DI")])
1243
 
1244
(define_insn "*sse_concatv4sf"
1245
  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
1246
        (vec_concat:V4SF
1247
          (match_operand:V2SF 1 "register_operand" " 0,0")
1248
          (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1249
  "TARGET_SSE"
1250
  "@
1251
   movlhps\t{%2, %0|%0, %2}
1252
   movhps\t{%2, %0|%0, %2}"
1253
  [(set_attr "type" "ssemov")
1254
   (set_attr "mode" "V4SF,V2SF")])
1255
 
1256
(define_expand "vec_initv4sf"
1257
  [(match_operand:V4SF 0 "register_operand" "")
1258
   (match_operand 1 "" "")]
1259
  "TARGET_SSE"
1260
{
1261
  ix86_expand_vector_init (false, operands[0], operands[1]);
1262
  DONE;
1263
})
1264
 
1265
(define_insn "*vec_setv4sf_0"
1266
  [(set (match_operand:V4SF 0 "nonimmediate_operand"  "=x,x,Y ,m")
1267
        (vec_merge:V4SF
1268
          (vec_duplicate:V4SF
1269
            (match_operand:SF 2 "general_operand"     " x,m,*r,x*rfF"))
1270
          (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1271
          (const_int 1)))]
1272
  "TARGET_SSE"
1273
  "@
1274
   movss\t{%2, %0|%0, %2}
1275
   movss\t{%2, %0|%0, %2}
1276
   movd\t{%2, %0|%0, %2}
1277
   #"
1278
  [(set_attr "type" "ssemov")
1279
   (set_attr "mode" "SF")])
1280
 
1281
(define_split
1282
  [(set (match_operand:V4SF 0 "memory_operand" "")
1283
        (vec_merge:V4SF
1284
          (vec_duplicate:V4SF
1285
            (match_operand:SF 1 "nonmemory_operand" ""))
1286
          (match_dup 0)
1287
          (const_int 1)))]
1288
  "TARGET_SSE && reload_completed"
1289
  [(const_int 0)]
1290
{
1291
  emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1292
  DONE;
1293
})
1294
 
1295
(define_expand "vec_setv4sf"
1296
  [(match_operand:V4SF 0 "register_operand" "")
1297
   (match_operand:SF 1 "register_operand" "")
1298
   (match_operand 2 "const_int_operand" "")]
1299
  "TARGET_SSE"
1300
{
1301
  ix86_expand_vector_set (false, operands[0], operands[1],
1302
                          INTVAL (operands[2]));
1303
  DONE;
1304
})
1305
 
1306
(define_insn_and_split "*vec_extractv4sf_0"
1307
  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1308
        (vec_select:SF
1309
          (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1310
          (parallel [(const_int 0)])))]
1311
  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1312
  "#"
1313
  "&& reload_completed"
1314
  [(const_int 0)]
1315
{
1316
  rtx op1 = operands[1];
1317
  if (REG_P (op1))
1318
    op1 = gen_rtx_REG (SFmode, REGNO (op1));
1319
  else
1320
    op1 = gen_lowpart (SFmode, op1);
1321
  emit_move_insn (operands[0], op1);
1322
  DONE;
1323
})
1324
 
1325
(define_expand "vec_extractv4sf"
1326
  [(match_operand:SF 0 "register_operand" "")
1327
   (match_operand:V4SF 1 "register_operand" "")
1328
   (match_operand 2 "const_int_operand" "")]
1329
  "TARGET_SSE"
1330
{
1331
  ix86_expand_vector_extract (false, operands[0], operands[1],
1332
                              INTVAL (operands[2]));
1333
  DONE;
1334
})
1335
 
1336
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1337
;;
1338
;; Parallel double-precision floating point arithmetic
1339
;;
1340
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1341
 
1342
(define_expand "negv2df2"
1343
  [(set (match_operand:V2DF 0 "register_operand" "")
1344
        (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1345
  "TARGET_SSE2"
1346
  "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1347
 
1348
(define_expand "absv2df2"
1349
  [(set (match_operand:V2DF 0 "register_operand" "")
1350
        (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1351
  "TARGET_SSE2"
1352
  "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1353
 
1354
(define_expand "addv2df3"
1355
  [(set (match_operand:V2DF 0 "register_operand" "")
1356
        (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1357
                   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1358
  "TARGET_SSE2"
1359
  "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1360
 
1361
(define_insn "*addv2df3"
1362
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1363
        (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1364
                   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1365
  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1366
  "addpd\t{%2, %0|%0, %2}"
1367
  [(set_attr "type" "sseadd")
1368
   (set_attr "mode" "V2DF")])
1369
 
1370
(define_insn "sse2_vmaddv2df3"
1371
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1372
        (vec_merge:V2DF
1373
          (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1374
                     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1375
          (match_dup 1)
1376
          (const_int 1)))]
1377
  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1378
  "addsd\t{%2, %0|%0, %2}"
1379
  [(set_attr "type" "sseadd")
1380
   (set_attr "mode" "DF")])
1381
 
1382
(define_expand "subv2df3"
1383
  [(set (match_operand:V2DF 0 "register_operand" "")
1384
        (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1385
                    (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1386
  "TARGET_SSE2"
1387
  "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1388
 
1389
(define_insn "*subv2df3"
1390
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1391
        (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1392
                    (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1393
  "TARGET_SSE2"
1394
  "subpd\t{%2, %0|%0, %2}"
1395
  [(set_attr "type" "sseadd")
1396
   (set_attr "mode" "V2DF")])
1397
 
1398
(define_insn "sse2_vmsubv2df3"
1399
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1400
        (vec_merge:V2DF
1401
          (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1402
                      (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1403
          (match_dup 1)
1404
          (const_int 1)))]
1405
  "TARGET_SSE2"
1406
  "subsd\t{%2, %0|%0, %2}"
1407
  [(set_attr "type" "sseadd")
1408
   (set_attr "mode" "DF")])
1409
 
1410
(define_expand "mulv2df3"
1411
  [(set (match_operand:V2DF 0 "register_operand" "")
1412
        (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1413
                   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1414
  "TARGET_SSE2"
1415
  "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1416
 
1417
(define_insn "*mulv2df3"
1418
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1419
        (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1420
                   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1421
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1422
  "mulpd\t{%2, %0|%0, %2}"
1423
  [(set_attr "type" "ssemul")
1424
   (set_attr "mode" "V2DF")])
1425
 
1426
(define_insn "sse2_vmmulv2df3"
1427
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1428
        (vec_merge:V2DF
1429
          (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1430
                     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1431
          (match_dup 1)
1432
          (const_int 1)))]
1433
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1434
  "mulsd\t{%2, %0|%0, %2}"
1435
  [(set_attr "type" "ssemul")
1436
   (set_attr "mode" "DF")])
1437
 
1438
(define_expand "divv2df3"
1439
  [(set (match_operand:V2DF 0 "register_operand" "")
1440
        (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1441
                  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1442
  "TARGET_SSE2"
1443
  "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1444
 
1445
(define_insn "*divv2df3"
1446
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1447
        (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1448
                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1449
  "TARGET_SSE2"
1450
  "divpd\t{%2, %0|%0, %2}"
1451
  [(set_attr "type" "ssediv")
1452
   (set_attr "mode" "V2DF")])
1453
 
1454
(define_insn "sse2_vmdivv2df3"
1455
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1456
        (vec_merge:V2DF
1457
          (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1458
                    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1459
          (match_dup 1)
1460
          (const_int 1)))]
1461
  "TARGET_SSE2"
1462
  "divsd\t{%2, %0|%0, %2}"
1463
  [(set_attr "type" "ssediv")
1464
   (set_attr "mode" "DF")])
1465
 
1466
(define_insn "sqrtv2df2"
1467
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1468
        (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1469
  "TARGET_SSE2"
1470
  "sqrtpd\t{%1, %0|%0, %1}"
1471
  [(set_attr "type" "sse")
1472
   (set_attr "mode" "V2DF")])
1473
 
1474
(define_insn "sse2_vmsqrtv2df2"
1475
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1476
        (vec_merge:V2DF
1477
          (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1478
          (match_operand:V2DF 2 "register_operand" "0")
1479
          (const_int 1)))]
1480
  "TARGET_SSE2"
1481
  "sqrtsd\t{%1, %0|%0, %1}"
1482
  [(set_attr "type" "sse")
1483
   (set_attr "mode" "DF")])
1484
 
1485
;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1486
;; isn't really correct, as those rtl operators aren't defined when
1487
;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
1488
 
1489
(define_expand "smaxv2df3"
1490
  [(set (match_operand:V2DF 0 "register_operand" "")
1491
        (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1492
                   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1493
  "TARGET_SSE2"
1494
{
1495
  if (!flag_finite_math_only)
1496
    operands[1] = force_reg (V2DFmode, operands[1]);
1497
  ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1498
})
1499
 
1500
(define_insn "*smaxv2df3_finite"
1501
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1502
        (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1503
                   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1504
  "TARGET_SSE2 && flag_finite_math_only
1505
   && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1506
  "maxpd\t{%2, %0|%0, %2}"
1507
  [(set_attr "type" "sseadd")
1508
   (set_attr "mode" "V2DF")])
1509
 
1510
(define_insn "*smaxv2df3"
1511
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1512
        (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1513
                   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1514
  "TARGET_SSE2"
1515
  "maxpd\t{%2, %0|%0, %2}"
1516
  [(set_attr "type" "sseadd")
1517
   (set_attr "mode" "V2DF")])
1518
 
1519
(define_insn "sse2_vmsmaxv2df3"
1520
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1521
        (vec_merge:V2DF
1522
          (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1523
                     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1524
          (match_dup 1)
1525
          (const_int 1)))]
1526
  "TARGET_SSE2"
1527
  "maxsd\t{%2, %0|%0, %2}"
1528
  [(set_attr "type" "sseadd")
1529
   (set_attr "mode" "DF")])
1530
 
1531
(define_expand "sminv2df3"
1532
  [(set (match_operand:V2DF 0 "register_operand" "")
1533
        (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1534
                   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1535
  "TARGET_SSE2"
1536
{
1537
  if (!flag_finite_math_only)
1538
    operands[1] = force_reg (V2DFmode, operands[1]);
1539
  ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1540
})
1541
 
1542
(define_insn "*sminv2df3_finite"
1543
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1544
        (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1545
                   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1546
  "TARGET_SSE2 && flag_finite_math_only
1547
   && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1548
  "minpd\t{%2, %0|%0, %2}"
1549
  [(set_attr "type" "sseadd")
1550
   (set_attr "mode" "V2DF")])
1551
 
1552
(define_insn "*sminv2df3"
1553
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1554
        (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1555
                   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1556
  "TARGET_SSE2"
1557
  "minpd\t{%2, %0|%0, %2}"
1558
  [(set_attr "type" "sseadd")
1559
   (set_attr "mode" "V2DF")])
1560
 
1561
(define_insn "sse2_vmsminv2df3"
1562
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1563
        (vec_merge:V2DF
1564
          (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1565
                     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1566
          (match_dup 1)
1567
          (const_int 1)))]
1568
  "TARGET_SSE2"
1569
  "minsd\t{%2, %0|%0, %2}"
1570
  [(set_attr "type" "sseadd")
1571
   (set_attr "mode" "DF")])
1572
 
1573
(define_insn "sse3_addsubv2df3"
1574
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1575
        (vec_merge:V2DF
1576
          (plus:V2DF
1577
            (match_operand:V2DF 1 "register_operand" "0")
1578
            (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1579
          (minus:V2DF (match_dup 1) (match_dup 2))
1580
          (const_int 1)))]
1581
  "TARGET_SSE3"
1582
  "addsubpd\t{%2, %0|%0, %2}"
1583
  [(set_attr "type" "sseadd")
1584
   (set_attr "mode" "V2DF")])
1585
 
1586
(define_insn "sse3_haddv2df3"
1587
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1588
        (vec_concat:V2DF
1589
          (plus:DF
1590
            (vec_select:DF
1591
              (match_operand:V2DF 1 "register_operand" "0")
1592
              (parallel [(const_int 0)]))
1593
            (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1594
          (plus:DF
1595
            (vec_select:DF
1596
              (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1597
              (parallel [(const_int 0)]))
1598
            (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1599
  "TARGET_SSE3"
1600
  "haddpd\t{%2, %0|%0, %2}"
1601
  [(set_attr "type" "sseadd")
1602
   (set_attr "mode" "V2DF")])
1603
 
1604
(define_insn "sse3_hsubv2df3"
1605
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1606
        (vec_concat:V2DF
1607
          (minus:DF
1608
            (vec_select:DF
1609
              (match_operand:V2DF 1 "register_operand" "0")
1610
              (parallel [(const_int 0)]))
1611
            (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1612
          (minus:DF
1613
            (vec_select:DF
1614
              (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1615
              (parallel [(const_int 0)]))
1616
            (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1617
  "TARGET_SSE3"
1618
  "hsubpd\t{%2, %0|%0, %2}"
1619
  [(set_attr "type" "sseadd")
1620
   (set_attr "mode" "V2DF")])
1621
 
1622
(define_expand "reduc_splus_v2df"
1623
  [(match_operand:V2DF 0 "register_operand" "")
1624
   (match_operand:V2DF 1 "register_operand" "")]
1625
  "TARGET_SSE3"
1626
{
1627
  emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1628
  DONE;
1629
})
1630
 
1631
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1632
;;
1633
;; Parallel double-precision floating point comparisons
1634
;;
1635
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1636
 
1637
(define_insn "sse2_maskcmpv2df3"
1638
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1639
        (match_operator:V2DF 3 "sse_comparison_operator"
1640
                [(match_operand:V2DF 1 "register_operand" "0")
1641
                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1642
  "TARGET_SSE2"
1643
  "cmp%D3pd\t{%2, %0|%0, %2}"
1644
  [(set_attr "type" "ssecmp")
1645
   (set_attr "mode" "V2DF")])
1646
 
1647
(define_insn "sse2_vmmaskcmpv2df3"
1648
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1649
        (vec_merge:V2DF
1650
          (match_operator:V2DF 3 "sse_comparison_operator"
1651
                [(match_operand:V2DF 1 "register_operand" "0")
1652
                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1653
          (match_dup 1)
1654
          (const_int 1)))]
1655
  "TARGET_SSE2"
1656
  "cmp%D3sd\t{%2, %0|%0, %2}"
1657
  [(set_attr "type" "ssecmp")
1658
   (set_attr "mode" "DF")])
1659
 
1660
(define_insn "sse2_comi"
1661
  [(set (reg:CCFP FLAGS_REG)
1662
        (compare:CCFP
1663
          (vec_select:DF
1664
            (match_operand:V2DF 0 "register_operand" "x")
1665
            (parallel [(const_int 0)]))
1666
          (vec_select:DF
1667
            (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1668
            (parallel [(const_int 0)]))))]
1669
  "TARGET_SSE2"
1670
  "comisd\t{%1, %0|%0, %1}"
1671
  [(set_attr "type" "ssecomi")
1672
   (set_attr "mode" "DF")])
1673
 
1674
(define_insn "sse2_ucomi"
1675
  [(set (reg:CCFPU FLAGS_REG)
1676
        (compare:CCFPU
1677
          (vec_select:DF
1678
            (match_operand:V2DF 0 "register_operand" "x")
1679
            (parallel [(const_int 0)]))
1680
          (vec_select:DF
1681
            (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1682
            (parallel [(const_int 0)]))))]
1683
  "TARGET_SSE2"
1684
  "ucomisd\t{%1, %0|%0, %1}"
1685
  [(set_attr "type" "ssecomi")
1686
   (set_attr "mode" "DF")])
1687
 
1688
(define_expand "vcondv2df"
1689
  [(set (match_operand:V2DF 0 "register_operand" "")
1690
        (if_then_else:V2DF
1691
          (match_operator 3 ""
1692
            [(match_operand:V2DF 4 "nonimmediate_operand" "")
1693
             (match_operand:V2DF 5 "nonimmediate_operand" "")])
1694
          (match_operand:V2DF 1 "general_operand" "")
1695
          (match_operand:V2DF 2 "general_operand" "")))]
1696
  "TARGET_SSE2"
1697
{
1698
  if (ix86_expand_fp_vcond (operands))
1699
    DONE;
1700
  else
1701
    FAIL;
1702
})
1703
 
1704
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1705
;;
1706
;; Parallel double-precision floating point logical operations
1707
;;
1708
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1709
 
1710
(define_expand "andv2df3"
1711
  [(set (match_operand:V2DF 0 "register_operand" "")
1712
        (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1713
                  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1714
  "TARGET_SSE2"
1715
  "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1716
 
1717
(define_insn "*andv2df3"
1718
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1719
        (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1720
                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1721
  "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1722
  "andpd\t{%2, %0|%0, %2}"
1723
  [(set_attr "type" "sselog")
1724
   (set_attr "mode" "V2DF")])
1725
 
1726
(define_insn "sse2_nandv2df3"
1727
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1728
        (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1729
                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1730
  "TARGET_SSE2"
1731
  "andnpd\t{%2, %0|%0, %2}"
1732
  [(set_attr "type" "sselog")
1733
   (set_attr "mode" "V2DF")])
1734
 
1735
(define_expand "iorv2df3"
1736
  [(set (match_operand:V2DF 0 "register_operand" "")
1737
        (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1738
                  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1739
  "TARGET_SSE2"
1740
  "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1741
 
1742
(define_insn "*iorv2df3"
1743
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1744
        (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1745
                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1746
  "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1747
  "orpd\t{%2, %0|%0, %2}"
1748
  [(set_attr "type" "sselog")
1749
   (set_attr "mode" "V2DF")])
1750
 
1751
(define_expand "xorv2df3"
1752
  [(set (match_operand:V2DF 0 "register_operand" "")
1753
        (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1754
                  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1755
  "TARGET_SSE2"
1756
  "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1757
 
1758
(define_insn "*xorv2df3"
1759
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1760
        (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1761
                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1762
  "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1763
  "xorpd\t{%2, %0|%0, %2}"
1764
  [(set_attr "type" "sselog")
1765
   (set_attr "mode" "V2DF")])
1766
 
1767
;; Also define scalar versions.  These are used for abs, neg, and
1768
;; conditional move.  Using subregs into vector modes causes register
1769
;; allocation lossage.  These patterns do not allow memory operands
1770
;; because the native instructions read the full 128-bits.
1771
 
1772
(define_insn "*anddf3"
1773
  [(set (match_operand:DF 0 "register_operand" "=x")
1774
        (and:DF (match_operand:DF 1 "register_operand" "0")
1775
                (match_operand:DF 2 "register_operand" "x")))]
1776
  "TARGET_SSE2"
1777
  "andpd\t{%2, %0|%0, %2}"
1778
  [(set_attr "type" "sselog")
1779
   (set_attr "mode" "V2DF")])
1780
 
1781
(define_insn "*nanddf3"
1782
  [(set (match_operand:DF 0 "register_operand" "=x")
1783
        (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1784
                (match_operand:DF 2 "register_operand" "x")))]
1785
  "TARGET_SSE2"
1786
  "andnpd\t{%2, %0|%0, %2}"
1787
  [(set_attr "type" "sselog")
1788
   (set_attr "mode" "V2DF")])
1789
 
1790
(define_insn "*iordf3"
1791
  [(set (match_operand:DF 0 "register_operand" "=x")
1792
        (ior:DF (match_operand:DF 1 "register_operand" "0")
1793
                (match_operand:DF 2 "register_operand" "x")))]
1794
  "TARGET_SSE2"
1795
  "orpd\t{%2, %0|%0, %2}"
1796
  [(set_attr "type" "sselog")
1797
   (set_attr "mode" "V2DF")])
1798
 
1799
(define_insn "*xordf3"
1800
  [(set (match_operand:DF 0 "register_operand" "=x")
1801
        (xor:DF (match_operand:DF 1 "register_operand" "0")
1802
                (match_operand:DF 2 "register_operand" "x")))]
1803
  "TARGET_SSE2"
1804
  "xorpd\t{%2, %0|%0, %2}"
1805
  [(set_attr "type" "sselog")
1806
   (set_attr "mode" "V2DF")])
1807
 
1808
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1809
;;
1810
;; Parallel double-precision floating point conversion operations
1811
;;
1812
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1813
 
1814
(define_insn "sse2_cvtpi2pd"
1815
  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1816
        (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1817
  "TARGET_SSE2"
1818
  "cvtpi2pd\t{%1, %0|%0, %1}"
1819
  [(set_attr "type" "ssecvt")
1820
   (set_attr "unit" "mmx,*")
1821
   (set_attr "mode" "V2DF")])
1822
 
1823
(define_insn "sse2_cvtpd2pi"
1824
  [(set (match_operand:V2SI 0 "register_operand" "=y")
1825
        (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1826
                     UNSPEC_FIX_NOTRUNC))]
1827
  "TARGET_SSE2"
1828
  "cvtpd2pi\t{%1, %0|%0, %1}"
1829
  [(set_attr "type" "ssecvt")
1830
   (set_attr "unit" "mmx")
1831
   (set_attr "mode" "DI")])
1832
 
1833
(define_insn "sse2_cvttpd2pi"
1834
  [(set (match_operand:V2SI 0 "register_operand" "=y")
1835
        (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1836
  "TARGET_SSE2"
1837
  "cvttpd2pi\t{%1, %0|%0, %1}"
1838
  [(set_attr "type" "ssecvt")
1839
   (set_attr "unit" "mmx")
1840
   (set_attr "mode" "TI")])
1841
 
1842
(define_insn "sse2_cvtsi2sd"
1843
  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1844
        (vec_merge:V2DF
1845
          (vec_duplicate:V2DF
1846
            (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1847
          (match_operand:V2DF 1 "register_operand" "0,0")
1848
          (const_int 1)))]
1849
  "TARGET_SSE2"
1850
  "cvtsi2sd\t{%2, %0|%0, %2}"
1851
  [(set_attr "type" "sseicvt")
1852
   (set_attr "mode" "DF")
1853
   (set_attr "athlon_decode" "double,direct")])
1854
 
1855
(define_insn "sse2_cvtsi2sdq"
1856
  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1857
        (vec_merge:V2DF
1858
          (vec_duplicate:V2DF
1859
            (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1860
          (match_operand:V2DF 1 "register_operand" "0,0")
1861
          (const_int 1)))]
1862
  "TARGET_SSE2 && TARGET_64BIT"
1863
  "cvtsi2sdq\t{%2, %0|%0, %2}"
1864
  [(set_attr "type" "sseicvt")
1865
   (set_attr "mode" "DF")
1866
   (set_attr "athlon_decode" "double,direct")])
1867
 
1868
(define_insn "sse2_cvtsd2si"
1869
  [(set (match_operand:SI 0 "register_operand" "=r,r")
1870
        (unspec:SI
1871
          [(vec_select:DF
1872
             (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1873
             (parallel [(const_int 0)]))]
1874
          UNSPEC_FIX_NOTRUNC))]
1875
  "TARGET_SSE2"
1876
  "cvtsd2si\t{%1, %0|%0, %1}"
1877
  [(set_attr "type" "sseicvt")
1878
   (set_attr "athlon_decode" "double,vector")
1879
   (set_attr "mode" "SI")])
1880
 
1881
(define_insn "sse2_cvtsd2siq"
1882
  [(set (match_operand:DI 0 "register_operand" "=r,r")
1883
        (unspec:DI
1884
          [(vec_select:DF
1885
             (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1886
             (parallel [(const_int 0)]))]
1887
          UNSPEC_FIX_NOTRUNC))]
1888
  "TARGET_SSE2 && TARGET_64BIT"
1889
  "cvtsd2siq\t{%1, %0|%0, %1}"
1890
  [(set_attr "type" "sseicvt")
1891
   (set_attr "athlon_decode" "double,vector")
1892
   (set_attr "mode" "DI")])
1893
 
1894
(define_insn "sse2_cvttsd2si"
1895
  [(set (match_operand:SI 0 "register_operand" "=r,r")
1896
        (fix:SI
1897
          (vec_select:DF
1898
            (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1899
            (parallel [(const_int 0)]))))]
1900
  "TARGET_SSE2"
1901
  "cvttsd2si\t{%1, %0|%0, %1}"
1902
  [(set_attr "type" "sseicvt")
1903
   (set_attr "mode" "SI")
1904
   (set_attr "athlon_decode" "double,vector")])
1905
 
1906
(define_insn "sse2_cvttsd2siq"
1907
  [(set (match_operand:DI 0 "register_operand" "=r,r")
1908
        (fix:DI
1909
          (vec_select:DF
1910
            (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1911
            (parallel [(const_int 0)]))))]
1912
  "TARGET_SSE2 && TARGET_64BIT"
1913
  "cvttsd2siq\t{%1, %0|%0, %1}"
1914
  [(set_attr "type" "sseicvt")
1915
   (set_attr "mode" "DI")
1916
   (set_attr "athlon_decode" "double,vector")])
1917
 
1918
(define_insn "sse2_cvtdq2pd"
1919
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1920
        (float:V2DF
1921
          (vec_select:V2SI
1922
            (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1923
            (parallel [(const_int 0) (const_int 1)]))))]
1924
  "TARGET_SSE2"
1925
  "cvtdq2pd\t{%1, %0|%0, %1}"
1926
  [(set_attr "type" "ssecvt")
1927
   (set_attr "mode" "V2DF")])
1928
 
1929
(define_expand "sse2_cvtpd2dq"
1930
  [(set (match_operand:V4SI 0 "register_operand" "")
1931
        (vec_concat:V4SI
1932
          (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1933
                       UNSPEC_FIX_NOTRUNC)
1934
          (match_dup 2)))]
1935
  "TARGET_SSE2"
1936
  "operands[2] = CONST0_RTX (V2SImode);")
1937
 
1938
(define_insn "*sse2_cvtpd2dq"
1939
  [(set (match_operand:V4SI 0 "register_operand" "=x")
1940
        (vec_concat:V4SI
1941
          (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1942
                       UNSPEC_FIX_NOTRUNC)
1943
          (match_operand:V2SI 2 "const0_operand" "")))]
1944
  "TARGET_SSE2"
1945
  "cvtpd2dq\t{%1, %0|%0, %1}"
1946
  [(set_attr "type" "ssecvt")
1947
   (set_attr "mode" "TI")])
1948
 
1949
(define_expand "sse2_cvttpd2dq"
1950
  [(set (match_operand:V4SI 0 "register_operand" "")
1951
        (vec_concat:V4SI
1952
          (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1953
          (match_dup 2)))]
1954
  "TARGET_SSE2"
1955
  "operands[2] = CONST0_RTX (V2SImode);")
1956
 
1957
(define_insn "*sse2_cvttpd2dq"
1958
  [(set (match_operand:V4SI 0 "register_operand" "=x")
1959
        (vec_concat:V4SI
1960
          (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1961
          (match_operand:V2SI 2 "const0_operand" "")))]
1962
  "TARGET_SSE2"
1963
  "cvttpd2dq\t{%1, %0|%0, %1}"
1964
  [(set_attr "type" "ssecvt")
1965
   (set_attr "mode" "TI")])
1966
 
1967
(define_insn "sse2_cvtsd2ss"
1968
  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1969
        (vec_merge:V4SF
1970
          (vec_duplicate:V4SF
1971
            (float_truncate:V2SF
1972
              (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1973
          (match_operand:V4SF 1 "register_operand" "0,0")
1974
          (const_int 1)))]
1975
  "TARGET_SSE2"
1976
  "cvtsd2ss\t{%2, %0|%0, %2}"
1977
  [(set_attr "type" "ssecvt")
1978
   (set_attr "athlon_decode" "vector,double")
1979
   (set_attr "mode" "SF")])
1980
 
1981
(define_insn "sse2_cvtss2sd"
1982
  [(set (match_operand:V2DF 0 "register_operand" "=x")
1983
        (vec_merge:V2DF
1984
          (float_extend:V2DF
1985
            (vec_select:V2SF
1986
              (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1987
              (parallel [(const_int 0) (const_int 1)])))
1988
          (match_operand:V2DF 1 "register_operand" "0")
1989
          (const_int 1)))]
1990
  "TARGET_SSE2"
1991
  "cvtss2sd\t{%2, %0|%0, %2}"
1992
  [(set_attr "type" "ssecvt")
1993
   (set_attr "mode" "DF")])
1994
 
1995
(define_expand "sse2_cvtpd2ps"
1996
  [(set (match_operand:V4SF 0 "register_operand" "")
1997
        (vec_concat:V4SF
1998
          (float_truncate:V2SF
1999
            (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2000
          (match_dup 2)))]
2001
  "TARGET_SSE2"
2002
  "operands[2] = CONST0_RTX (V2SFmode);")
2003
 
2004
(define_insn "*sse2_cvtpd2ps"
2005
  [(set (match_operand:V4SF 0 "register_operand" "=x")
2006
        (vec_concat:V4SF
2007
          (float_truncate:V2SF
2008
            (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2009
          (match_operand:V2SF 2 "const0_operand" "")))]
2010
  "TARGET_SSE2"
2011
  "cvtpd2ps\t{%1, %0|%0, %1}"
2012
  [(set_attr "type" "ssecvt")
2013
   (set_attr "mode" "V4SF")])
2014
 
2015
(define_insn "sse2_cvtps2pd"
2016
  [(set (match_operand:V2DF 0 "register_operand" "=x")
2017
        (float_extend:V2DF
2018
          (vec_select:V2SF
2019
            (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2020
            (parallel [(const_int 0) (const_int 1)]))))]
2021
  "TARGET_SSE2"
2022
  "cvtps2pd\t{%1, %0|%0, %1}"
2023
  [(set_attr "type" "ssecvt")
2024
   (set_attr "mode" "V2DF")])
2025
 
2026
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2027
;;
2028
;; Parallel double-precision floating point element swizzling
2029
;;
2030
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2031
 
2032
(define_insn "sse2_unpckhpd"
2033
  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,m")
2034
        (vec_select:V2DF
2035
          (vec_concat:V4DF
2036
            (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2037
            (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2038
          (parallel [(const_int 1)
2039
                     (const_int 3)])))]
2040
  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2041
  "@
2042
   unpckhpd\t{%2, %0|%0, %2}
2043
   movlpd\t{%H1, %0|%0, %H1}
2044
   movhpd\t{%1, %0|%0, %1}"
2045
  [(set_attr "type" "sselog,ssemov,ssemov")
2046
   (set_attr "mode" "V2DF,V1DF,V1DF")])
2047
 
2048
(define_insn "*sse3_movddup"
2049
  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,o")
2050
        (vec_select:V2DF
2051
          (vec_concat:V4DF
2052
            (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2053
            (match_dup 1))
2054
          (parallel [(const_int 0)
2055
                     (const_int 2)])))]
2056
  "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2057
  "@
2058
   movddup\t{%1, %0|%0, %1}
2059
   #"
2060
  [(set_attr "type" "sselog1,ssemov")
2061
   (set_attr "mode" "V2DF")])
2062
 
2063
(define_split
2064
  [(set (match_operand:V2DF 0 "memory_operand" "")
2065
        (vec_select:V2DF
2066
          (vec_concat:V4DF
2067
            (match_operand:V2DF 1 "register_operand" "")
2068
            (match_dup 1))
2069
          (parallel [(const_int 0)
2070
                     (const_int 2)])))]
2071
  "TARGET_SSE3 && reload_completed"
2072
  [(const_int 0)]
2073
{
2074
  rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2075
  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2076
  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2077
  DONE;
2078
})
2079
 
2080
(define_insn "sse2_unpcklpd"
2081
  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,o")
2082
        (vec_select:V2DF
2083
          (vec_concat:V4DF
2084
            (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2085
            (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2086
          (parallel [(const_int 0)
2087
                     (const_int 2)])))]
2088
  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2089
  "@
2090
   unpcklpd\t{%2, %0|%0, %2}
2091
   movhpd\t{%2, %0|%0, %2}
2092
   movlpd\t{%2, %H0|%H0, %2}"
2093
  [(set_attr "type" "sselog,ssemov,ssemov")
2094
   (set_attr "mode" "V2DF,V1DF,V1DF")])
2095
 
2096
(define_expand "sse2_shufpd"
2097
  [(match_operand:V2DF 0 "register_operand" "")
2098
   (match_operand:V2DF 1 "register_operand" "")
2099
   (match_operand:V2DF 2 "nonimmediate_operand" "")
2100
   (match_operand:SI 3 "const_int_operand" "")]
2101
  "TARGET_SSE2"
2102
{
2103
  int mask = INTVAL (operands[3]);
2104
  emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2105
                                GEN_INT (mask & 1),
2106
                                GEN_INT (mask & 2 ? 3 : 2)));
2107
  DONE;
2108
})
2109
 
2110
(define_insn "sse2_shufpd_1"
2111
  [(set (match_operand:V2DF 0 "register_operand" "=x")
2112
        (vec_select:V2DF
2113
          (vec_concat:V4DF
2114
            (match_operand:V2DF 1 "register_operand" "0")
2115
            (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2116
          (parallel [(match_operand 3 "const_0_to_1_operand" "")
2117
                     (match_operand 4 "const_2_to_3_operand" "")])))]
2118
  "TARGET_SSE2"
2119
{
2120
  int mask;
2121
  mask = INTVAL (operands[3]);
2122
  mask |= (INTVAL (operands[4]) - 2) << 1;
2123
  operands[3] = GEN_INT (mask);
2124
 
2125
  return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2126
}
2127
  [(set_attr "type" "sselog")
2128
   (set_attr "mode" "V2DF")])
2129
 
2130
(define_insn "sse2_storehpd"
2131
  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x*fr")
2132
        (vec_select:DF
2133
          (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2134
          (parallel [(const_int 1)])))]
2135
  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2136
  "@
2137
   movhpd\t{%1, %0|%0, %1}
2138
   unpckhpd\t%0, %0
2139
   #"
2140
  [(set_attr "type" "ssemov,sselog1,ssemov")
2141
   (set_attr "mode" "V1DF,V2DF,DF")])
2142
 
2143
(define_split
2144
  [(set (match_operand:DF 0 "register_operand" "")
2145
        (vec_select:DF
2146
          (match_operand:V2DF 1 "memory_operand" "")
2147
          (parallel [(const_int 1)])))]
2148
  "TARGET_SSE2 && reload_completed"
2149
  [(set (match_dup 0) (match_dup 1))]
2150
{
2151
  operands[1] = adjust_address (operands[1], DFmode, 8);
2152
})
2153
 
2154
(define_insn "sse2_storelpd"
2155
  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x*fr")
2156
        (vec_select:DF
2157
          (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2158
          (parallel [(const_int 0)])))]
2159
  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2160
  "@
2161
   movlpd\t{%1, %0|%0, %1}
2162
   #
2163
   #"
2164
  [(set_attr "type" "ssemov")
2165
   (set_attr "mode" "V1DF,DF,DF")])
2166
 
2167
(define_split
2168
  [(set (match_operand:DF 0 "register_operand" "")
2169
        (vec_select:DF
2170
          (match_operand:V2DF 1 "nonimmediate_operand" "")
2171
          (parallel [(const_int 0)])))]
2172
  "TARGET_SSE2 && reload_completed"
2173
  [(const_int 0)]
2174
{
2175
  rtx op1 = operands[1];
2176
  if (REG_P (op1))
2177
    op1 = gen_rtx_REG (DFmode, REGNO (op1));
2178
  else
2179
    op1 = gen_lowpart (DFmode, op1);
2180
  emit_move_insn (operands[0], op1);
2181
  DONE;
2182
})
2183
 
2184
(define_insn "sse2_loadhpd"
2185
  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,o")
2186
        (vec_concat:V2DF
2187
          (vec_select:DF
2188
            (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2189
            (parallel [(const_int 0)]))
2190
          (match_operand:DF 2 "nonimmediate_operand"     " m,x,0,x*fr")))]
2191
  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2192
  "@
2193
   movhpd\t{%2, %0|%0, %2}
2194
   unpcklpd\t{%2, %0|%0, %2}
2195
   shufpd\t{$1, %1, %0|%0, %1, 1}
2196
   #"
2197
  [(set_attr "type" "ssemov,sselog,sselog,other")
2198
   (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2199
 
2200
(define_split
2201
  [(set (match_operand:V2DF 0 "memory_operand" "")
2202
        (vec_concat:V2DF
2203
          (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2204
          (match_operand:DF 1 "register_operand" "")))]
2205
  "TARGET_SSE2 && reload_completed"
2206
  [(set (match_dup 0) (match_dup 1))]
2207
{
2208
  operands[0] = adjust_address (operands[0], DFmode, 8);
2209
})
2210
 
2211
(define_insn "sse2_loadlpd"
2212
  [(set (match_operand:V2DF 0 "nonimmediate_operand"    "=x,x,x,x,x,m")
2213
        (vec_concat:V2DF
2214
          (match_operand:DF 2 "nonimmediate_operand"    " m,m,x,0,0,x*fr")
2215
          (vec_select:DF
2216
            (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2217
            (parallel [(const_int 1)]))))]
2218
  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2219
  "@
2220
   movsd\t{%2, %0|%0, %2}
2221
   movlpd\t{%2, %0|%0, %2}
2222
   movsd\t{%2, %0|%0, %2}
2223
   shufpd\t{$2, %2, %0|%0, %2, 2}
2224
   movhpd\t{%H1, %0|%0, %H1}
2225
   #"
2226
  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2227
   (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2228
 
2229
(define_split
2230
  [(set (match_operand:V2DF 0 "memory_operand" "")
2231
        (vec_concat:V2DF
2232
          (match_operand:DF 1 "register_operand" "")
2233
          (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2234
  "TARGET_SSE2 && reload_completed"
2235
  [(set (match_dup 0) (match_dup 1))]
2236
{
2237
  operands[0] = adjust_address (operands[0], DFmode, 8);
2238
})
2239
 
2240
;; Not sure these two are ever used, but it doesn't hurt to have
2241
;; them. -aoliva
2242
(define_insn "*vec_extractv2df_1_sse"
2243
  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2244
        (vec_select:DF
2245
          (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2246
          (parallel [(const_int 1)])))]
2247
  "!TARGET_SSE2 && TARGET_SSE
2248
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2249
  "@
2250
   movhps\t{%1, %0|%0, %1}
2251
   movhlps\t{%1, %0|%0, %1}
2252
   movlps\t{%H1, %0|%0, %H1}"
2253
  [(set_attr "type" "ssemov")
2254
   (set_attr "mode" "V2SF,V4SF,V2SF")])
2255
 
2256
(define_insn "*vec_extractv2df_0_sse"
2257
  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2258
        (vec_select:DF
2259
          (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2260
          (parallel [(const_int 0)])))]
2261
  "!TARGET_SSE2 && TARGET_SSE
2262
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2263
  "@
2264
   movlps\t{%1, %0|%0, %1}
2265
   movaps\t{%1, %0|%0, %1}
2266
   movlps\t{%1, %0|%0, %1}"
2267
  [(set_attr "type" "ssemov")
2268
   (set_attr "mode" "V2SF,V4SF,V2SF")])
2269
 
2270
(define_insn "sse2_movsd"
2271
  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,m,x,x,o")
2272
        (vec_merge:V2DF
2273
          (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2274
          (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2275
          (const_int 1)))]
2276
  "TARGET_SSE2"
2277
  "@
2278
   movsd\t{%2, %0|%0, %2}
2279
   movlpd\t{%2, %0|%0, %2}
2280
   movlpd\t{%2, %0|%0, %2}
2281
   shufpd\t{$2, %2, %0|%0, %2, 2}
2282
   movhps\t{%H1, %0|%0, %H1}
2283
   movhps\t{%1, %H0|%H0, %1}"
2284
  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2285
   (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2286
 
2287
(define_insn "*vec_dupv2df_sse3"
2288
  [(set (match_operand:V2DF 0 "register_operand" "=x")
2289
        (vec_duplicate:V2DF
2290
          (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2291
  "TARGET_SSE3"
2292
  "movddup\t{%1, %0|%0, %1}"
2293
  [(set_attr "type" "sselog1")
2294
   (set_attr "mode" "DF")])
2295
 
2296
(define_insn "*vec_dupv2df"
2297
  [(set (match_operand:V2DF 0 "register_operand" "=x")
2298
        (vec_duplicate:V2DF
2299
          (match_operand:DF 1 "register_operand" "0")))]
2300
  "TARGET_SSE2"
2301
  "unpcklpd\t%0, %0"
2302
  [(set_attr "type" "sselog1")
2303
   (set_attr "mode" "V4SF")])
2304
 
2305
(define_insn "*vec_concatv2df_sse3"
2306
  [(set (match_operand:V2DF 0 "register_operand" "=x")
2307
        (vec_concat:V2DF
2308
          (match_operand:DF 1 "nonimmediate_operand" "xm")
2309
          (match_dup 1)))]
2310
  "TARGET_SSE3"
2311
  "movddup\t{%1, %0|%0, %1}"
2312
  [(set_attr "type" "sselog1")
2313
   (set_attr "mode" "DF")])
2314
 
2315
(define_insn "*vec_concatv2df"
2316
  [(set (match_operand:V2DF 0 "register_operand"     "=Y,Y,Y,x,x")
2317
        (vec_concat:V2DF
2318
          (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2319
          (match_operand:DF 2 "vector_move_operand"  " Y,m,C,x,m")))]
2320
  "TARGET_SSE"
2321
  "@
2322
   unpcklpd\t{%2, %0|%0, %2}
2323
   movhpd\t{%2, %0|%0, %2}
2324
   movsd\t{%1, %0|%0, %1}
2325
   movlhps\t{%2, %0|%0, %2}
2326
   movhps\t{%2, %0|%0, %2}"
2327
  [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2328
   (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2329
 
2330
(define_expand "vec_setv2df"
2331
  [(match_operand:V2DF 0 "register_operand" "")
2332
   (match_operand:DF 1 "register_operand" "")
2333
   (match_operand 2 "const_int_operand" "")]
2334
  "TARGET_SSE"
2335
{
2336
  ix86_expand_vector_set (false, operands[0], operands[1],
2337
                          INTVAL (operands[2]));
2338
  DONE;
2339
})
2340
 
2341
(define_expand "vec_extractv2df"
2342
  [(match_operand:DF 0 "register_operand" "")
2343
   (match_operand:V2DF 1 "register_operand" "")
2344
   (match_operand 2 "const_int_operand" "")]
2345
  "TARGET_SSE"
2346
{
2347
  ix86_expand_vector_extract (false, operands[0], operands[1],
2348
                              INTVAL (operands[2]));
2349
  DONE;
2350
})
2351
 
2352
(define_expand "vec_initv2df"
2353
  [(match_operand:V2DF 0 "register_operand" "")
2354
   (match_operand 1 "" "")]
2355
  "TARGET_SSE"
2356
{
2357
  ix86_expand_vector_init (false, operands[0], operands[1]);
2358
  DONE;
2359
})
2360
 
2361
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2362
;;
2363
;; Parallel integral arithmetic
2364
;;
2365
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2366
 
2367
(define_expand "neg2"
2368
  [(set (match_operand:SSEMODEI 0 "register_operand" "")
2369
        (minus:SSEMODEI
2370
          (match_dup 2)
2371
          (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2372
  "TARGET_SSE2"
2373
  "operands[2] = force_reg (mode, CONST0_RTX (mode));")
2374
 
2375
(define_expand "add3"
2376
  [(set (match_operand:SSEMODEI 0 "register_operand" "")
2377
        (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2378
                       (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2379
  "TARGET_SSE2"
2380
  "ix86_fixup_binary_operands_no_copy (PLUS, mode, operands);")
2381
 
2382
(define_insn "*add3"
2383
  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2384
        (plus:SSEMODEI
2385
          (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2386
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2387
  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, mode, operands)"
2388
  "padd\t{%2, %0|%0, %2}"
2389
  [(set_attr "type" "sseiadd")
2390
   (set_attr "mode" "TI")])
2391
 
2392
(define_insn "sse2_ssadd3"
2393
  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2394
        (ss_plus:SSEMODE12
2395
          (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2396
          (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2397
  "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, mode, operands)"
2398
  "padds\t{%2, %0|%0, %2}"
2399
  [(set_attr "type" "sseiadd")
2400
   (set_attr "mode" "TI")])
2401
 
2402
(define_insn "sse2_usadd3"
2403
  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2404
        (us_plus:SSEMODE12
2405
          (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2406
          (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2407
  "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, mode, operands)"
2408
  "paddus\t{%2, %0|%0, %2}"
2409
  [(set_attr "type" "sseiadd")
2410
   (set_attr "mode" "TI")])
2411
 
2412
(define_expand "sub3"
2413
  [(set (match_operand:SSEMODEI 0 "register_operand" "")
2414
        (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2415
                        (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2416
  "TARGET_SSE2"
2417
  "ix86_fixup_binary_operands_no_copy (MINUS, mode, operands);")
2418
 
2419
(define_insn "*sub3"
2420
  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2421
        (minus:SSEMODEI
2422
          (match_operand:SSEMODEI 1 "register_operand" "0")
2423
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2424
  "TARGET_SSE2"
2425
  "psub\t{%2, %0|%0, %2}"
2426
  [(set_attr "type" "sseiadd")
2427
   (set_attr "mode" "TI")])
2428
 
2429
(define_insn "sse2_sssub3"
2430
  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2431
        (ss_minus:SSEMODE12
2432
          (match_operand:SSEMODE12 1 "register_operand" "0")
2433
          (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2434
  "TARGET_SSE2"
2435
  "psubs\t{%2, %0|%0, %2}"
2436
  [(set_attr "type" "sseiadd")
2437
   (set_attr "mode" "TI")])
2438
 
2439
(define_insn "sse2_ussub3"
2440
  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2441
        (us_minus:SSEMODE12
2442
          (match_operand:SSEMODE12 1 "register_operand" "0")
2443
          (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2444
  "TARGET_SSE2"
2445
  "psubus\t{%2, %0|%0, %2}"
2446
  [(set_attr "type" "sseiadd")
2447
   (set_attr "mode" "TI")])
2448
 
2449
(define_expand "mulv16qi3"
2450
  [(set (match_operand:V16QI 0 "register_operand" "")
2451
        (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2452
                    (match_operand:V16QI 2 "register_operand" "")))]
2453
  "TARGET_SSE2"
2454
{
2455
  rtx t[12], op0;
2456
  int i;
2457
 
2458
  for (i = 0; i < 12; ++i)
2459
    t[i] = gen_reg_rtx (V16QImode);
2460
 
2461
  /* Unpack data such that we've got a source byte in each low byte of
2462
     each word.  We don't care what goes into the high byte of each word.
2463
     Rather than trying to get zero in there, most convenient is to let
2464
     it be a copy of the low byte.  */
2465
  emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2466
  emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2467
  emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2468
  emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2469
 
2470
  /* Multiply words.  The end-of-line annotations here give a picture of what
2471
     the output of that instruction looks like.  Dot means don't care; the
2472
     letters are the bytes of the result with A being the most significant.  */
2473
  emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2474
                           gen_lowpart (V8HImode, t[0]),
2475
                           gen_lowpart (V8HImode, t[1])));
2476
  emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2477
                           gen_lowpart (V8HImode, t[2]),
2478
                           gen_lowpart (V8HImode, t[3])));
2479
 
2480
  /* Extract the relevant bytes and merge them back together.  */
2481
  emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4]));    /* ..AI..BJ..CK..DL */
2482
  emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4]));    /* ..EM..FN..GO..HP */
2483
  emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6]));    /* ....AEIM....BFJN */
2484
  emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6]));    /* ....CGKO....DHLP */
2485
  emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8]));   /* ........ACEGIKMO */
2486
  emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8]));   /* ........BDFHJLNP */
2487
 
2488
  op0 = operands[0];
2489
  emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10]));   /* ABCDEFGHIJKLMNOP */
2490
  DONE;
2491
})
2492
 
2493
(define_expand "mulv8hi3"
2494
  [(set (match_operand:V8HI 0 "register_operand" "")
2495
        (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2496
                   (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2497
  "TARGET_SSE2"
2498
  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2499
 
2500
(define_insn "*mulv8hi3"
2501
  [(set (match_operand:V8HI 0 "register_operand" "=x")
2502
        (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2503
                   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2504
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2505
  "pmullw\t{%2, %0|%0, %2}"
2506
  [(set_attr "type" "sseimul")
2507
   (set_attr "mode" "TI")])
2508
 
2509
(define_insn "sse2_smulv8hi3_highpart"
2510
  [(set (match_operand:V8HI 0 "register_operand" "=x")
2511
        (truncate:V8HI
2512
          (lshiftrt:V8SI
2513
            (mult:V8SI
2514
              (sign_extend:V8SI
2515
                (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2516
              (sign_extend:V8SI
2517
                (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2518
            (const_int 16))))]
2519
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2520
  "pmulhw\t{%2, %0|%0, %2}"
2521
  [(set_attr "type" "sseimul")
2522
   (set_attr "mode" "TI")])
2523
 
2524
(define_insn "sse2_umulv8hi3_highpart"
2525
  [(set (match_operand:V8HI 0 "register_operand" "=x")
2526
        (truncate:V8HI
2527
          (lshiftrt:V8SI
2528
            (mult:V8SI
2529
              (zero_extend:V8SI
2530
                (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2531
              (zero_extend:V8SI
2532
                (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2533
            (const_int 16))))]
2534
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2535
  "pmulhuw\t{%2, %0|%0, %2}"
2536
  [(set_attr "type" "sseimul")
2537
   (set_attr "mode" "TI")])
2538
 
2539
(define_insn "sse2_umulv2siv2di3"
2540
  [(set (match_operand:V2DI 0 "register_operand" "=x")
2541
        (mult:V2DI
2542
          (zero_extend:V2DI
2543
            (vec_select:V2SI
2544
              (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2545
              (parallel [(const_int 0) (const_int 2)])))
2546
          (zero_extend:V2DI
2547
            (vec_select:V2SI
2548
              (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2549
              (parallel [(const_int 0) (const_int 2)])))))]
2550
  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2551
  "pmuludq\t{%2, %0|%0, %2}"
2552
  [(set_attr "type" "sseimul")
2553
   (set_attr "mode" "TI")])
2554
 
2555
(define_insn "sse2_pmaddwd"
2556
  [(set (match_operand:V4SI 0 "register_operand" "=x")
2557
        (plus:V4SI
2558
          (mult:V4SI
2559
            (sign_extend:V4SI
2560
              (vec_select:V4HI
2561
                (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2562
                (parallel [(const_int 0)
2563
                           (const_int 2)
2564
                           (const_int 4)
2565
                           (const_int 6)])))
2566
            (sign_extend:V4SI
2567
              (vec_select:V4HI
2568
                (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2569
                (parallel [(const_int 0)
2570
                           (const_int 2)
2571
                           (const_int 4)
2572
                           (const_int 6)]))))
2573
          (mult:V4SI
2574
            (sign_extend:V4SI
2575
              (vec_select:V4HI (match_dup 1)
2576
                (parallel [(const_int 1)
2577
                           (const_int 3)
2578
                           (const_int 5)
2579
                           (const_int 7)])))
2580
            (sign_extend:V4SI
2581
              (vec_select:V4HI (match_dup 2)
2582
                (parallel [(const_int 1)
2583
                           (const_int 3)
2584
                           (const_int 5)
2585
                           (const_int 7)]))))))]
2586
  "TARGET_SSE2"
2587
  "pmaddwd\t{%2, %0|%0, %2}"
2588
  [(set_attr "type" "sseiadd")
2589
   (set_attr "mode" "TI")])
2590
 
2591
(define_expand "mulv4si3"
2592
  [(set (match_operand:V4SI 0 "register_operand" "")
2593
        (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2594
                   (match_operand:V4SI 2 "register_operand" "")))]
2595
  "TARGET_SSE2"
2596
{
2597
  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2598
  rtx op0, op1, op2;
2599
 
2600
  op0 = operands[0];
2601
  op1 = operands[1];
2602
  op2 = operands[2];
2603
  t1 = gen_reg_rtx (V4SImode);
2604
  t2 = gen_reg_rtx (V4SImode);
2605
  t3 = gen_reg_rtx (V4SImode);
2606
  t4 = gen_reg_rtx (V4SImode);
2607
  t5 = gen_reg_rtx (V4SImode);
2608
  t6 = gen_reg_rtx (V4SImode);
2609
  thirtytwo = GEN_INT (32);
2610
 
2611
  /* Multiply elements 2 and 0.  */
2612
  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2613
 
2614
  /* Shift both input vectors down one element, so that elements 3 and 1
2615
     are now in the slots for elements 2 and 0.  For K8, at least, this is
2616
     faster than using a shuffle.  */
2617
  emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2618
                               gen_lowpart (TImode, op1), thirtytwo));
2619
  emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2620
                               gen_lowpart (TImode, op2), thirtytwo));
2621
 
2622
  /* Multiply elements 3 and 1.  */
2623
  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2624
 
2625
  /* Move the results in element 2 down to element 1; we don't care what
2626
     goes in elements 2 and 3.  */
2627
  emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2628
                                const0_rtx, const0_rtx));
2629
  emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2630
                                const0_rtx, const0_rtx));
2631
 
2632
  /* Merge the parts back together.  */
2633
  emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2634
  DONE;
2635
})
2636
 
2637
(define_expand "mulv2di3"
2638
  [(set (match_operand:V2DI 0 "register_operand" "")
2639
        (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2640
                   (match_operand:V2DI 2 "register_operand" "")))]
2641
  "TARGET_SSE2"
2642
{
2643
  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2644
  rtx op0, op1, op2;
2645
 
2646
  op0 = operands[0];
2647
  op1 = operands[1];
2648
  op2 = operands[2];
2649
  t1 = gen_reg_rtx (V2DImode);
2650
  t2 = gen_reg_rtx (V2DImode);
2651
  t3 = gen_reg_rtx (V2DImode);
2652
  t4 = gen_reg_rtx (V2DImode);
2653
  t5 = gen_reg_rtx (V2DImode);
2654
  t6 = gen_reg_rtx (V2DImode);
2655
  thirtytwo = GEN_INT (32);
2656
 
2657
  /* Multiply low parts.  */
2658
  emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2659
                                     gen_lowpart (V4SImode, op2)));
2660
 
2661
  /* Shift input vectors left 32 bits so we can multiply high parts.  */
2662
  emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2663
  emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2664
 
2665
  /* Multiply high parts by low parts.  */
2666
  emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2667
                                     gen_lowpart (V4SImode, t3)));
2668
  emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2669
                                     gen_lowpart (V4SImode, t2)));
2670
 
2671
  /* Shift them back.  */
2672
  emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2673
  emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2674
 
2675
  /* Add the three parts together.  */
2676
  emit_insn (gen_addv2di3 (t6, t1, t4));
2677
  emit_insn (gen_addv2di3 (op0, t6, t5));
2678
  DONE;
2679
})
2680
 
2681
(define_expand "sdot_prodv8hi"
2682
  [(match_operand:V4SI 0 "register_operand" "")
2683
   (match_operand:V8HI 1 "nonimmediate_operand" "")
2684
   (match_operand:V8HI 2 "nonimmediate_operand" "")
2685
   (match_operand:V4SI 3 "register_operand" "")]
2686
  "TARGET_SSE2"
2687
{
2688
  rtx t = gen_reg_rtx (V4SImode);
2689
  emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
2690
  emit_insn (gen_addv4si3 (operands[0], operands[3], t));
2691
  DONE;
2692
})
2693
 
2694
(define_expand "udot_prodv4si"
2695
  [(match_operand:V2DI 0 "register_operand" "")
2696
   (match_operand:V4SI 1 "register_operand" "")
2697
   (match_operand:V4SI 2 "register_operand" "")
2698
   (match_operand:V2DI 3 "register_operand" "")]
2699
  "TARGET_SSE2"
2700
{
2701
  rtx t1, t2, t3, t4;
2702
 
2703
  t1 = gen_reg_rtx (V2DImode);
2704
  emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
2705
  emit_insn (gen_addv2di3 (t1, t1, operands[3]));
2706
 
2707
  t2 = gen_reg_rtx (V4SImode);
2708
  t3 = gen_reg_rtx (V4SImode);
2709
  emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2710
                               gen_lowpart (TImode, operands[1]),
2711
                               GEN_INT (32)));
2712
  emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2713
                               gen_lowpart (TImode, operands[2]),
2714
                               GEN_INT (32)));
2715
 
2716
  t4 = gen_reg_rtx (V2DImode);
2717
  emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
2718
 
2719
  emit_insn (gen_addv2di3 (operands[0], t1, t4));
2720
  DONE;
2721
})
2722
 
2723
(define_insn "ashr3"
2724
  [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2725
        (ashiftrt:SSEMODE24
2726
          (match_operand:SSEMODE24 1 "register_operand" "0")
2727
          (match_operand:TI 2 "nonmemory_operand" "xn")))]
2728
  "TARGET_SSE2"
2729
  "psra\t{%2, %0|%0, %2}"
2730
  [(set_attr "type" "sseishft")
2731
   (set_attr "mode" "TI")])
2732
 
2733
(define_insn "lshr3"
2734
  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2735
        (lshiftrt:SSEMODE248
2736
          (match_operand:SSEMODE248 1 "register_operand" "0")
2737
          (match_operand:TI 2 "nonmemory_operand" "xn")))]
2738
  "TARGET_SSE2"
2739
  "psrl\t{%2, %0|%0, %2}"
2740
  [(set_attr "type" "sseishft")
2741
   (set_attr "mode" "TI")])
2742
 
2743
(define_insn "ashl3"
2744
  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2745
        (ashift:SSEMODE248
2746
          (match_operand:SSEMODE248 1 "register_operand" "0")
2747
          (match_operand:TI 2 "nonmemory_operand" "xn")))]
2748
  "TARGET_SSE2"
2749
  "psll\t{%2, %0|%0, %2}"
2750
  [(set_attr "type" "sseishft")
2751
   (set_attr "mode" "TI")])
2752
 
2753
(define_insn "sse2_ashlti3"
2754
  [(set (match_operand:TI 0 "register_operand" "=x")
2755
        (ashift:TI (match_operand:TI 1 "register_operand" "0")
2756
                   (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2757
  "TARGET_SSE2"
2758
{
2759
  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2760
  return "pslldq\t{%2, %0|%0, %2}";
2761
}
2762
  [(set_attr "type" "sseishft")
2763
   (set_attr "mode" "TI")])
2764
 
2765
(define_expand "vec_shl_"
2766
  [(set (match_operand:SSEMODEI 0 "register_operand" "")
2767
        (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2768
                   (match_operand:SI 2 "general_operand" "")))]
2769
  "TARGET_SSE2"
2770
{
2771
  if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2772
    FAIL;
2773
  operands[0] = gen_lowpart (TImode, operands[0]);
2774
  operands[1] = gen_lowpart (TImode, operands[1]);
2775
})
2776
 
2777
(define_insn "sse2_lshrti3"
2778
  [(set (match_operand:TI 0 "register_operand" "=x")
2779
        (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2780
                     (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2781
  "TARGET_SSE2"
2782
{
2783
  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2784
  return "psrldq\t{%2, %0|%0, %2}";
2785
}
2786
  [(set_attr "type" "sseishft")
2787
   (set_attr "mode" "TI")])
2788
 
2789
(define_expand "vec_shr_"
2790
  [(set (match_operand:SSEMODEI 0 "register_operand" "")
2791
        (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2792
                     (match_operand:SI 2 "general_operand" "")))]
2793
  "TARGET_SSE2"
2794
{
2795
  if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2796
    FAIL;
2797
  operands[0] = gen_lowpart (TImode, operands[0]);
2798
  operands[1] = gen_lowpart (TImode, operands[1]);
2799
})
2800
 
2801
(define_expand "umaxv16qi3"
2802
  [(set (match_operand:V16QI 0 "register_operand" "")
2803
        (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2804
                    (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2805
  "TARGET_SSE2"
2806
  "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2807
 
2808
(define_insn "*umaxv16qi3"
2809
  [(set (match_operand:V16QI 0 "register_operand" "=x")
2810
        (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2811
                    (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2812
  "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2813
  "pmaxub\t{%2, %0|%0, %2}"
2814
  [(set_attr "type" "sseiadd")
2815
   (set_attr "mode" "TI")])
2816
 
2817
(define_expand "smaxv8hi3"
2818
  [(set (match_operand:V8HI 0 "register_operand" "")
2819
        (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2820
                   (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2821
  "TARGET_SSE2"
2822
  "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2823
 
2824
(define_insn "*smaxv8hi3"
2825
  [(set (match_operand:V8HI 0 "register_operand" "=x")
2826
        (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2827
                   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2828
  "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2829
  "pmaxsw\t{%2, %0|%0, %2}"
2830
  [(set_attr "type" "sseiadd")
2831
   (set_attr "mode" "TI")])
2832
 
2833
(define_expand "umaxv8hi3"
2834
  [(set (match_operand:V8HI 0 "register_operand" "=x")
2835
        (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
2836
                       (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2837
   (set (match_dup 3)
2838
        (plus:V8HI (match_dup 0) (match_dup 2)))]
2839
  "TARGET_SSE2"
2840
{
2841
  operands[3] = operands[0];
2842
  if (rtx_equal_p (operands[0], operands[2]))
2843
    operands[0] = gen_reg_rtx (V8HImode);
2844
})
2845
 
2846
(define_expand "smax3"
2847
  [(set (match_operand:SSEMODE14 0 "register_operand" "")
2848
        (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2849
                        (match_operand:SSEMODE14 2 "register_operand" "")))]
2850
  "TARGET_SSE2"
2851
{
2852
  rtx xops[6];
2853
  bool ok;
2854
 
2855
  xops[0] = operands[0];
2856
  xops[1] = operands[1];
2857
  xops[2] = operands[2];
2858
  xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2859
  xops[4] = operands[1];
2860
  xops[5] = operands[2];
2861
  ok = ix86_expand_int_vcond (xops);
2862
  gcc_assert (ok);
2863
  DONE;
2864
})
2865
 
2866
(define_expand "umaxv4si3"
2867
  [(set (match_operand:V4SI 0 "register_operand" "")
2868
        (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
2869
                   (match_operand:V4SI 2 "register_operand" "")))]
2870
  "TARGET_SSE2"
2871
{
2872
  rtx xops[6];
2873
  bool ok;
2874
 
2875
  xops[0] = operands[0];
2876
  xops[1] = operands[1];
2877
  xops[2] = operands[2];
2878
  xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2879
  xops[4] = operands[1];
2880
  xops[5] = operands[2];
2881
  ok = ix86_expand_int_vcond (xops);
2882
  gcc_assert (ok);
2883
  DONE;
2884
})
2885
 
2886
(define_expand "uminv16qi3"
2887
  [(set (match_operand:V16QI 0 "register_operand" "")
2888
        (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2889
                    (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2890
  "TARGET_SSE2"
2891
  "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
2892
 
2893
(define_insn "*uminv16qi3"
2894
  [(set (match_operand:V16QI 0 "register_operand" "=x")
2895
        (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2896
                    (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2897
  "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2898
  "pminub\t{%2, %0|%0, %2}"
2899
  [(set_attr "type" "sseiadd")
2900
   (set_attr "mode" "TI")])
2901
 
2902
(define_expand "sminv8hi3"
2903
  [(set (match_operand:V8HI 0 "register_operand" "")
2904
        (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2905
                   (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2906
  "TARGET_SSE2"
2907
  "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2908
 
2909
(define_insn "*sminv8hi3"
2910
  [(set (match_operand:V8HI 0 "register_operand" "=x")
2911
        (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2912
                   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2913
  "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2914
  "pminsw\t{%2, %0|%0, %2}"
2915
  [(set_attr "type" "sseiadd")
2916
   (set_attr "mode" "TI")])
2917
 
2918
(define_expand "smin3"
2919
  [(set (match_operand:SSEMODE14 0 "register_operand" "")
2920
        (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2921
                        (match_operand:SSEMODE14 2 "register_operand" "")))]
2922
  "TARGET_SSE2"
2923
{
2924
  rtx xops[6];
2925
  bool ok;
2926
 
2927
  xops[0] = operands[0];
2928
  xops[1] = operands[2];
2929
  xops[2] = operands[1];
2930
  xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2931
  xops[4] = operands[1];
2932
  xops[5] = operands[2];
2933
  ok = ix86_expand_int_vcond (xops);
2934
  gcc_assert (ok);
2935
  DONE;
2936
})
2937
 
2938
(define_expand "umin3"
2939
  [(set (match_operand:SSEMODE24 0 "register_operand" "")
2940
        (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
2941
                        (match_operand:SSEMODE24 2 "register_operand" "")))]
2942
  "TARGET_SSE2"
2943
{
2944
  rtx xops[6];
2945
  bool ok;
2946
 
2947
  xops[0] = operands[0];
2948
  xops[1] = operands[2];
2949
  xops[2] = operands[1];
2950
  xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2951
  xops[4] = operands[1];
2952
  xops[5] = operands[2];
2953
  ok = ix86_expand_int_vcond (xops);
2954
  gcc_assert (ok);
2955
  DONE;
2956
})
2957
 
2958
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2959
;;
2960
;; Parallel integral comparisons
2961
;;
2962
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2963
 
2964
(define_insn "sse2_eq3"
2965
  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2966
        (eq:SSEMODE124
2967
          (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2968
          (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2969
  "TARGET_SSE2 && ix86_binary_operator_ok (EQ, mode, operands)"
2970
  "pcmpeq\t{%2, %0|%0, %2}"
2971
  [(set_attr "type" "ssecmp")
2972
   (set_attr "mode" "TI")])
2973
 
2974
(define_insn "sse2_gt3"
2975
  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2976
        (gt:SSEMODE124
2977
          (match_operand:SSEMODE124 1 "register_operand" "0")
2978
          (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2979
  "TARGET_SSE2"
2980
  "pcmpgt\t{%2, %0|%0, %2}"
2981
  [(set_attr "type" "ssecmp")
2982
   (set_attr "mode" "TI")])
2983
 
2984
(define_expand "vcond"
2985
  [(set (match_operand:SSEMODE124 0 "register_operand" "")
2986
        (if_then_else:SSEMODE124
2987
          (match_operator 3 ""
2988
            [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
2989
             (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
2990
          (match_operand:SSEMODE124 1 "general_operand" "")
2991
          (match_operand:SSEMODE124 2 "general_operand" "")))]
2992
  "TARGET_SSE2"
2993
{
2994
  if (ix86_expand_int_vcond (operands))
2995
    DONE;
2996
  else
2997
    FAIL;
2998
})
2999
 
3000
(define_expand "vcondu"
3001
  [(set (match_operand:SSEMODE124 0 "register_operand" "")
3002
        (if_then_else:SSEMODE124
3003
          (match_operator 3 ""
3004
            [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3005
             (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3006
          (match_operand:SSEMODE124 1 "general_operand" "")
3007
          (match_operand:SSEMODE124 2 "general_operand" "")))]
3008
  "TARGET_SSE2"
3009
{
3010
  if (ix86_expand_int_vcond (operands))
3011
    DONE;
3012
  else
3013
    FAIL;
3014
})
3015
 
3016
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3017
;;
3018
;; Parallel integral logical operations
3019
;;
3020
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3021
 
3022
(define_expand "one_cmpl2"
3023
  [(set (match_operand:SSEMODEI 0 "register_operand" "")
3024
        (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3025
                      (match_dup 2)))]
3026
  "TARGET_SSE2"
3027
{
3028
  int i, n = GET_MODE_NUNITS (mode);
3029
  rtvec v = rtvec_alloc (n);
3030
 
3031
  for (i = 0; i < n; ++i)
3032
    RTVEC_ELT (v, i) = constm1_rtx;
3033
 
3034
  operands[2] = force_reg (mode, gen_rtx_CONST_VECTOR (mode, v));
3035
})
3036
 
3037
(define_expand "and3"
3038
  [(set (match_operand:SSEMODEI 0 "register_operand" "")
3039
        (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3040
                      (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3041
  "TARGET_SSE2"
3042
  "ix86_fixup_binary_operands_no_copy (AND, mode, operands);")
3043
 
3044
(define_insn "*and3"
3045
  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3046
        (and:SSEMODEI
3047
          (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3048
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3049
  "TARGET_SSE2 && ix86_binary_operator_ok (AND, mode, operands)"
3050
  "pand\t{%2, %0|%0, %2}"
3051
  [(set_attr "type" "sselog")
3052
   (set_attr "mode" "TI")])
3053
 
3054
(define_insn "sse2_nand3"
3055
  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3056
        (and:SSEMODEI
3057
          (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3058
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3059
  "TARGET_SSE2"
3060
  "pandn\t{%2, %0|%0, %2}"
3061
  [(set_attr "type" "sselog")
3062
   (set_attr "mode" "TI")])
3063
 
3064
(define_expand "ior3"
3065
  [(set (match_operand:SSEMODEI 0 "register_operand" "")
3066
        (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3067
                      (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3068
  "TARGET_SSE2"
3069
  "ix86_fixup_binary_operands_no_copy (IOR, mode, operands);")
3070
 
3071
(define_insn "*ior3"
3072
  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3073
        (ior:SSEMODEI
3074
          (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3075
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3076
  "TARGET_SSE2 && ix86_binary_operator_ok (IOR, mode, operands)"
3077
  "por\t{%2, %0|%0, %2}"
3078
  [(set_attr "type" "sselog")
3079
   (set_attr "mode" "TI")])
3080
 
3081
(define_expand "xor3"
3082
  [(set (match_operand:SSEMODEI 0 "register_operand" "")
3083
        (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3084
                      (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3085
  "TARGET_SSE2"
3086
  "ix86_fixup_binary_operands_no_copy (XOR, mode, operands);")
3087
 
3088
(define_insn "*xor3"
3089
  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3090
        (xor:SSEMODEI
3091
          (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3092
          (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3093
  "TARGET_SSE2 && ix86_binary_operator_ok (XOR, mode, operands)"
3094
  "pxor\t{%2, %0|%0, %2}"
3095
  [(set_attr "type" "sselog")
3096
   (set_attr "mode" "TI")])
3097
 
3098
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3099
;;
3100
;; Parallel integral element swizzling
3101
;;
3102
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3103
 
3104
(define_insn "sse2_packsswb"
3105
  [(set (match_operand:V16QI 0 "register_operand" "=x")
3106
        (vec_concat:V16QI
3107
          (ss_truncate:V8QI
3108
            (match_operand:V8HI 1 "register_operand" "0"))
3109
          (ss_truncate:V8QI
3110
            (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3111
  "TARGET_SSE2"
3112
  "packsswb\t{%2, %0|%0, %2}"
3113
  [(set_attr "type" "sselog")
3114
   (set_attr "mode" "TI")])
3115
 
3116
(define_insn "sse2_packssdw"
3117
  [(set (match_operand:V8HI 0 "register_operand" "=x")
3118
        (vec_concat:V8HI
3119
          (ss_truncate:V4HI
3120
            (match_operand:V4SI 1 "register_operand" "0"))
3121
          (ss_truncate:V4HI
3122
            (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3123
  "TARGET_SSE2"
3124
  "packssdw\t{%2, %0|%0, %2}"
3125
  [(set_attr "type" "sselog")
3126
   (set_attr "mode" "TI")])
3127
 
3128
(define_insn "sse2_packuswb"
3129
  [(set (match_operand:V16QI 0 "register_operand" "=x")
3130
        (vec_concat:V16QI
3131
          (us_truncate:V8QI
3132
            (match_operand:V8HI 1 "register_operand" "0"))
3133
          (us_truncate:V8QI
3134
            (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3135
  "TARGET_SSE2"
3136
  "packuswb\t{%2, %0|%0, %2}"
3137
  [(set_attr "type" "sselog")
3138
   (set_attr "mode" "TI")])
3139
 
3140
(define_insn "sse2_punpckhbw"
3141
  [(set (match_operand:V16QI 0 "register_operand" "=x")
3142
        (vec_select:V16QI
3143
          (vec_concat:V32QI
3144
            (match_operand:V16QI 1 "register_operand" "0")
3145
            (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3146
          (parallel [(const_int 8)  (const_int 24)
3147
                     (const_int 9)  (const_int 25)
3148
                     (const_int 10) (const_int 26)
3149
                     (const_int 11) (const_int 27)
3150
                     (const_int 12) (const_int 28)
3151
                     (const_int 13) (const_int 29)
3152
                     (const_int 14) (const_int 30)
3153
                     (const_int 15) (const_int 31)])))]
3154
  "TARGET_SSE2"
3155
  "punpckhbw\t{%2, %0|%0, %2}"
3156
  [(set_attr "type" "sselog")
3157
   (set_attr "mode" "TI")])
3158
 
3159
(define_insn "sse2_punpcklbw"
3160
  [(set (match_operand:V16QI 0 "register_operand" "=x")
3161
        (vec_select:V16QI
3162
          (vec_concat:V32QI
3163
            (match_operand:V16QI 1 "register_operand" "0")
3164
            (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3165
          (parallel [(const_int 0) (const_int 16)
3166
                     (const_int 1) (const_int 17)
3167
                     (const_int 2) (const_int 18)
3168
                     (const_int 3) (const_int 19)
3169
                     (const_int 4) (const_int 20)
3170
                     (const_int 5) (const_int 21)
3171
                     (const_int 6) (const_int 22)
3172
                     (const_int 7) (const_int 23)])))]
3173
  "TARGET_SSE2"
3174
  "punpcklbw\t{%2, %0|%0, %2}"
3175
  [(set_attr "type" "sselog")
3176
   (set_attr "mode" "TI")])
3177
 
3178
(define_insn "sse2_punpckhwd"
3179
  [(set (match_operand:V8HI 0 "register_operand" "=x")
3180
        (vec_select:V8HI
3181
          (vec_concat:V16HI
3182
            (match_operand:V8HI 1 "register_operand" "0")
3183
            (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3184
          (parallel [(const_int 4) (const_int 12)
3185
                     (const_int 5) (const_int 13)
3186
                     (const_int 6) (const_int 14)
3187
                     (const_int 7) (const_int 15)])))]
3188
  "TARGET_SSE2"
3189
  "punpckhwd\t{%2, %0|%0, %2}"
3190
  [(set_attr "type" "sselog")
3191
   (set_attr "mode" "TI")])
3192
 
3193
(define_insn "sse2_punpcklwd"
3194
  [(set (match_operand:V8HI 0 "register_operand" "=x")
3195
        (vec_select:V8HI
3196
          (vec_concat:V16HI
3197
            (match_operand:V8HI 1 "register_operand" "0")
3198
            (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3199
          (parallel [(const_int 0) (const_int 8)
3200
                     (const_int 1) (const_int 9)
3201
                     (const_int 2) (const_int 10)
3202
                     (const_int 3) (const_int 11)])))]
3203
  "TARGET_SSE2"
3204
  "punpcklwd\t{%2, %0|%0, %2}"
3205
  [(set_attr "type" "sselog")
3206
   (set_attr "mode" "TI")])
3207
 
3208
(define_insn "sse2_punpckhdq"
3209
  [(set (match_operand:V4SI 0 "register_operand" "=x")
3210
        (vec_select:V4SI
3211
          (vec_concat:V8SI
3212
            (match_operand:V4SI 1 "register_operand" "0")
3213
            (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3214
          (parallel [(const_int 2) (const_int 6)
3215
                     (const_int 3) (const_int 7)])))]
3216
  "TARGET_SSE2"
3217
  "punpckhdq\t{%2, %0|%0, %2}"
3218
  [(set_attr "type" "sselog")
3219
   (set_attr "mode" "TI")])
3220
 
3221
(define_insn "sse2_punpckldq"
3222
  [(set (match_operand:V4SI 0 "register_operand" "=x")
3223
        (vec_select:V4SI
3224
          (vec_concat:V8SI
3225
            (match_operand:V4SI 1 "register_operand" "0")
3226
            (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3227
          (parallel [(const_int 0) (const_int 4)
3228
                     (const_int 1) (const_int 5)])))]
3229
  "TARGET_SSE2"
3230
  "punpckldq\t{%2, %0|%0, %2}"
3231
  [(set_attr "type" "sselog")
3232
   (set_attr "mode" "TI")])
3233
 
3234
(define_insn "sse2_punpckhqdq"
3235
  [(set (match_operand:V2DI 0 "register_operand" "=x")
3236
        (vec_select:V2DI
3237
          (vec_concat:V4DI
3238
            (match_operand:V2DI 1 "register_operand" "0")
3239
            (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3240
          (parallel [(const_int 1)
3241
                     (const_int 3)])))]
3242
  "TARGET_SSE2"
3243
  "punpckhqdq\t{%2, %0|%0, %2}"
3244
  [(set_attr "type" "sselog")
3245
   (set_attr "mode" "TI")])
3246
 
3247
(define_insn "sse2_punpcklqdq"
3248
  [(set (match_operand:V2DI 0 "register_operand" "=x")
3249
        (vec_select:V2DI
3250
          (vec_concat:V4DI
3251
            (match_operand:V2DI 1 "register_operand" "0")
3252
            (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3253
          (parallel [(const_int 0)
3254
                     (const_int 2)])))]
3255
  "TARGET_SSE2"
3256
  "punpcklqdq\t{%2, %0|%0, %2}"
3257
  [(set_attr "type" "sselog")
3258
   (set_attr "mode" "TI")])
3259
 
3260
(define_expand "sse2_pinsrw"
3261
  [(set (match_operand:V8HI 0 "register_operand" "")
3262
        (vec_merge:V8HI
3263
          (vec_duplicate:V8HI
3264
            (match_operand:SI 2 "nonimmediate_operand" ""))
3265
          (match_operand:V8HI 1 "register_operand" "")
3266
          (match_operand:SI 3 "const_0_to_7_operand" "")))]
3267
  "TARGET_SSE2"
3268
{
3269
  operands[2] = gen_lowpart (HImode, operands[2]);
3270
  operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3271
})
3272
 
3273
(define_insn "*sse2_pinsrw"
3274
  [(set (match_operand:V8HI 0 "register_operand" "=x")
3275
        (vec_merge:V8HI
3276
          (vec_duplicate:V8HI
3277
            (match_operand:HI 2 "nonimmediate_operand" "rm"))
3278
          (match_operand:V8HI 1 "register_operand" "0")
3279
          (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3280
  "TARGET_SSE2"
3281
{
3282
  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3283
  return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3284
}
3285
  [(set_attr "type" "sselog")
3286
   (set_attr "mode" "TI")])
3287
 
3288
(define_insn "sse2_pextrw"
3289
  [(set (match_operand:SI 0 "register_operand" "=r")
3290
        (zero_extend:SI
3291
          (vec_select:HI
3292
            (match_operand:V8HI 1 "register_operand" "x")
3293
            (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3294
  "TARGET_SSE2"
3295
  "pextrw\t{%2, %1, %0|%0, %1, %2}"
3296
  [(set_attr "type" "sselog")
3297
   (set_attr "mode" "TI")])
3298
 
3299
(define_expand "sse2_pshufd"
3300
  [(match_operand:V4SI 0 "register_operand" "")
3301
   (match_operand:V4SI 1 "nonimmediate_operand" "")
3302
   (match_operand:SI 2 "const_int_operand" "")]
3303
  "TARGET_SSE2"
3304
{
3305
  int mask = INTVAL (operands[2]);
3306
  emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3307
                                GEN_INT ((mask >> 0) & 3),
3308
                                GEN_INT ((mask >> 2) & 3),
3309
                                GEN_INT ((mask >> 4) & 3),
3310
                                GEN_INT ((mask >> 6) & 3)));
3311
  DONE;
3312
})
3313
 
3314
(define_insn "sse2_pshufd_1"
3315
  [(set (match_operand:V4SI 0 "register_operand" "=x")
3316
        (vec_select:V4SI
3317
          (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3318
          (parallel [(match_operand 2 "const_0_to_3_operand" "")
3319
                     (match_operand 3 "const_0_to_3_operand" "")
3320
                     (match_operand 4 "const_0_to_3_operand" "")
3321
                     (match_operand 5 "const_0_to_3_operand" "")])))]
3322
  "TARGET_SSE2"
3323
{
3324
  int mask = 0;
3325
  mask |= INTVAL (operands[2]) << 0;
3326
  mask |= INTVAL (operands[3]) << 2;
3327
  mask |= INTVAL (operands[4]) << 4;
3328
  mask |= INTVAL (operands[5]) << 6;
3329
  operands[2] = GEN_INT (mask);
3330
 
3331
  return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3332
}
3333
  [(set_attr "type" "sselog1")
3334
   (set_attr "mode" "TI")])
3335
 
3336
(define_expand "sse2_pshuflw"
3337
  [(match_operand:V8HI 0 "register_operand" "")
3338
   (match_operand:V8HI 1 "nonimmediate_operand" "")
3339
   (match_operand:SI 2 "const_int_operand" "")]
3340
  "TARGET_SSE2"
3341
{
3342
  int mask = INTVAL (operands[2]);
3343
  emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3344
                                 GEN_INT ((mask >> 0) & 3),
3345
                                 GEN_INT ((mask >> 2) & 3),
3346
                                 GEN_INT ((mask >> 4) & 3),
3347
                                 GEN_INT ((mask >> 6) & 3)));
3348
  DONE;
3349
})
3350
 
3351
(define_insn "sse2_pshuflw_1"
3352
  [(set (match_operand:V8HI 0 "register_operand" "=x")
3353
        (vec_select:V8HI
3354
          (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3355
          (parallel [(match_operand 2 "const_0_to_3_operand" "")
3356
                     (match_operand 3 "const_0_to_3_operand" "")
3357
                     (match_operand 4 "const_0_to_3_operand" "")
3358
                     (match_operand 5 "const_0_to_3_operand" "")
3359
                     (const_int 4)
3360
                     (const_int 5)
3361
                     (const_int 6)
3362
                     (const_int 7)])))]
3363
  "TARGET_SSE2"
3364
{
3365
  int mask = 0;
3366
  mask |= INTVAL (operands[2]) << 0;
3367
  mask |= INTVAL (operands[3]) << 2;
3368
  mask |= INTVAL (operands[4]) << 4;
3369
  mask |= INTVAL (operands[5]) << 6;
3370
  operands[2] = GEN_INT (mask);
3371
 
3372
  return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3373
}
3374
  [(set_attr "type" "sselog")
3375
   (set_attr "mode" "TI")])
3376
 
3377
(define_expand "sse2_pshufhw"
3378
  [(match_operand:V8HI 0 "register_operand" "")
3379
   (match_operand:V8HI 1 "nonimmediate_operand" "")
3380
   (match_operand:SI 2 "const_int_operand" "")]
3381
  "TARGET_SSE2"
3382
{
3383
  int mask = INTVAL (operands[2]);
3384
  emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3385
                                 GEN_INT (((mask >> 0) & 3) + 4),
3386
                                 GEN_INT (((mask >> 2) & 3) + 4),
3387
                                 GEN_INT (((mask >> 4) & 3) + 4),
3388
                                 GEN_INT (((mask >> 6) & 3) + 4)));
3389
  DONE;
3390
})
3391
 
3392
(define_insn "sse2_pshufhw_1"
3393
  [(set (match_operand:V8HI 0 "register_operand" "=x")
3394
        (vec_select:V8HI
3395
          (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3396
          (parallel [(const_int 0)
3397
                     (const_int 1)
3398
                     (const_int 2)
3399
                     (const_int 3)
3400
                     (match_operand 2 "const_4_to_7_operand" "")
3401
                     (match_operand 3 "const_4_to_7_operand" "")
3402
                     (match_operand 4 "const_4_to_7_operand" "")
3403
                     (match_operand 5 "const_4_to_7_operand" "")])))]
3404
  "TARGET_SSE2"
3405
{
3406
  int mask = 0;
3407
  mask |= (INTVAL (operands[2]) - 4) << 0;
3408
  mask |= (INTVAL (operands[3]) - 4) << 2;
3409
  mask |= (INTVAL (operands[4]) - 4) << 4;
3410
  mask |= (INTVAL (operands[5]) - 4) << 6;
3411
  operands[2] = GEN_INT (mask);
3412
 
3413
  return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3414
}
3415
  [(set_attr "type" "sselog")
3416
   (set_attr "mode" "TI")])
3417
 
3418
(define_expand "sse2_loadd"
3419
  [(set (match_operand:V4SI 0 "register_operand" "")
3420
        (vec_merge:V4SI
3421
          (vec_duplicate:V4SI
3422
            (match_operand:SI 1 "nonimmediate_operand" ""))
3423
          (match_dup 2)
3424
          (const_int 1)))]
3425
  "TARGET_SSE"
3426
  "operands[2] = CONST0_RTX (V4SImode);")
3427
 
3428
(define_insn "sse2_loadld"
3429
  [(set (match_operand:V4SI 0 "register_operand"       "=Y,x,x")
3430
        (vec_merge:V4SI
3431
          (vec_duplicate:V4SI
3432
            (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3433
          (match_operand:V4SI 1 "reg_or_0_operand"     " C,C,0")
3434
          (const_int 1)))]
3435
  "TARGET_SSE"
3436
  "@
3437
   movd\t{%2, %0|%0, %2}
3438
   movss\t{%2, %0|%0, %2}
3439
   movss\t{%2, %0|%0, %2}"
3440
  [(set_attr "type" "ssemov")
3441
   (set_attr "mode" "TI,V4SF,SF")])
3442
 
3443
;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3444
;; be taken into account, and movdi isn't fully populated even without.
3445
(define_insn_and_split "sse2_stored"
3446
  [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3447
        (vec_select:SI
3448
          (match_operand:V4SI 1 "register_operand" "x")
3449
          (parallel [(const_int 0)])))]
3450
  "TARGET_SSE"
3451
  "#"
3452
  "&& reload_completed"
3453
  [(set (match_dup 0) (match_dup 1))]
3454
{
3455
  operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3456
})
3457
 
3458
(define_expand "sse_storeq"
3459
  [(set (match_operand:DI 0 "nonimmediate_operand" "")
3460
        (vec_select:DI
3461
          (match_operand:V2DI 1 "register_operand" "")
3462
          (parallel [(const_int 0)])))]
3463
  "TARGET_SSE"
3464
  "")
3465
 
3466
;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3467
;; be taken into account, and movdi isn't fully populated even without.
3468
(define_insn "*sse2_storeq"
3469
  [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3470
        (vec_select:DI
3471
          (match_operand:V2DI 1 "register_operand" "x")
3472
          (parallel [(const_int 0)])))]
3473
  "TARGET_SSE"
3474
  "#")
3475
 
3476
(define_split
3477
  [(set (match_operand:DI 0 "nonimmediate_operand" "")
3478
        (vec_select:DI
3479
          (match_operand:V2DI 1 "register_operand" "")
3480
          (parallel [(const_int 0)])))]
3481
  "TARGET_SSE && reload_completed"
3482
  [(set (match_dup 0) (match_dup 1))]
3483
{
3484
  operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3485
})
3486
 
3487
(define_insn "*vec_extractv2di_1_sse2"
3488
  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3489
        (vec_select:DI
3490
          (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
3491
          (parallel [(const_int 1)])))]
3492
  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3493
  "@
3494
   movhps\t{%1, %0|%0, %1}
3495
   psrldq\t{$8, %0|%0, 8}
3496
   movq\t{%H1, %0|%0, %H1}"
3497
  [(set_attr "type" "ssemov,sseishft,ssemov")
3498
   (set_attr "memory" "*,none,*")
3499
   (set_attr "mode" "V2SF,TI,TI")])
3500
 
3501
;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
3502
(define_insn "*vec_extractv2di_1_sse"
3503
  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3504
        (vec_select:DI
3505
          (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
3506
          (parallel [(const_int 1)])))]
3507
  "!TARGET_SSE2 && TARGET_SSE
3508
   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3509
  "@
3510
   movhps\t{%1, %0|%0, %1}
3511
   movhlps\t{%1, %0|%0, %1}
3512
   movlps\t{%H1, %0|%0, %H1}"
3513
  [(set_attr "type" "ssemov")
3514
   (set_attr "mode" "V2SF,V4SF,V2SF")])
3515
 
3516
(define_insn "*vec_dupv4si"
3517
  [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3518
        (vec_duplicate:V4SI
3519
          (match_operand:SI 1 "register_operand" " Y,0")))]
3520
  "TARGET_SSE"
3521
  "@
3522
   pshufd\t{$0, %1, %0|%0, %1, 0}
3523
   shufps\t{$0, %0, %0|%0, %0, 0}"
3524
  [(set_attr "type" "sselog1")
3525
   (set_attr "mode" "TI,V4SF")])
3526
 
3527
(define_insn "*vec_dupv2di"
3528
  [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3529
        (vec_duplicate:V2DI
3530
          (match_operand:DI 1 "register_operand" " 0,0")))]
3531
  "TARGET_SSE"
3532
  "@
3533
   punpcklqdq\t%0, %0
3534
   movlhps\t%0, %0"
3535
  [(set_attr "type" "sselog1,ssemov")
3536
   (set_attr "mode" "TI,V4SF")])
3537
 
3538
;; ??? In theory we can match memory for the MMX alternative, but allowing
3539
;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3540
;; alternatives pretty much forces the MMX alternative to be chosen.
3541
(define_insn "*sse2_concatv2si"
3542
  [(set (match_operand:V2SI 0 "register_operand"     "=Y, Y,*y,*y")
3543
        (vec_concat:V2SI
3544
          (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3545
          (match_operand:SI 2 "reg_or_0_operand"     " Y, C,*y, C")))]
3546
  "TARGET_SSE2"
3547
  "@
3548
   punpckldq\t{%2, %0|%0, %2}
3549
   movd\t{%1, %0|%0, %1}
3550
   punpckldq\t{%2, %0|%0, %2}
3551
   movd\t{%1, %0|%0, %1}"
3552
  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3553
   (set_attr "mode" "TI,TI,DI,DI")])
3554
 
3555
(define_insn "*sse1_concatv2si"
3556
  [(set (match_operand:V2SI 0 "register_operand"     "=x,x,*y,*y")
3557
        (vec_concat:V2SI
3558
          (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3559
          (match_operand:SI 2 "reg_or_0_operand"     " x,C,*y,C")))]
3560
  "TARGET_SSE"
3561
  "@
3562
   unpcklps\t{%2, %0|%0, %2}
3563
   movss\t{%1, %0|%0, %1}
3564
   punpckldq\t{%2, %0|%0, %2}
3565
   movd\t{%1, %0|%0, %1}"
3566
  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3567
   (set_attr "mode" "V4SF,V4SF,DI,DI")])
3568
 
3569
(define_insn "*vec_concatv4si_1"
3570
  [(set (match_operand:V4SI 0 "register_operand"       "=Y,x,x")
3571
        (vec_concat:V4SI
3572
          (match_operand:V2SI 1 "register_operand"     " 0,0,0")
3573
          (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3574
  "TARGET_SSE"
3575
  "@
3576
   punpcklqdq\t{%2, %0|%0, %2}
3577
   movlhps\t{%2, %0|%0, %2}
3578
   movhps\t{%2, %0|%0, %2}"
3579
  [(set_attr "type" "sselog,ssemov,ssemov")
3580
   (set_attr "mode" "TI,V4SF,V2SF")])
3581
 
3582
(define_insn "*vec_concatv2di"
3583
  [(set (match_operand:V2DI 0 "register_operand"     "=Y,?Y,Y,x,x,x")
3584
        (vec_concat:V2DI
3585
          (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3586
          (match_operand:DI 2 "vector_move_operand"  " C, C,Y,x,m,0")))]
3587
  "TARGET_SSE"
3588
  "@
3589
   movq\t{%1, %0|%0, %1}
3590
   movq2dq\t{%1, %0|%0, %1}
3591
   punpcklqdq\t{%2, %0|%0, %2}
3592
   movlhps\t{%2, %0|%0, %2}
3593
   movhps\t{%2, %0|%0, %2}
3594
   movlps\t{%1, %0|%0, %1}"
3595
  [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3596
   (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3597
 
3598
(define_expand "vec_setv2di"
3599
  [(match_operand:V2DI 0 "register_operand" "")
3600
   (match_operand:DI 1 "register_operand" "")
3601
   (match_operand 2 "const_int_operand" "")]
3602
  "TARGET_SSE"
3603
{
3604
  ix86_expand_vector_set (false, operands[0], operands[1],
3605
                          INTVAL (operands[2]));
3606
  DONE;
3607
})
3608
 
3609
(define_expand "vec_extractv2di"
3610
  [(match_operand:DI 0 "register_operand" "")
3611
   (match_operand:V2DI 1 "register_operand" "")
3612
   (match_operand 2 "const_int_operand" "")]
3613
  "TARGET_SSE"
3614
{
3615
  ix86_expand_vector_extract (false, operands[0], operands[1],
3616
                              INTVAL (operands[2]));
3617
  DONE;
3618
})
3619
 
3620
(define_expand "vec_initv2di"
3621
  [(match_operand:V2DI 0 "register_operand" "")
3622
   (match_operand 1 "" "")]
3623
  "TARGET_SSE"
3624
{
3625
  ix86_expand_vector_init (false, operands[0], operands[1]);
3626
  DONE;
3627
})
3628
 
3629
(define_expand "vec_setv4si"
3630
  [(match_operand:V4SI 0 "register_operand" "")
3631
   (match_operand:SI 1 "register_operand" "")
3632
   (match_operand 2 "const_int_operand" "")]
3633
  "TARGET_SSE"
3634
{
3635
  ix86_expand_vector_set (false, operands[0], operands[1],
3636
                          INTVAL (operands[2]));
3637
  DONE;
3638
})
3639
 
3640
(define_expand "vec_extractv4si"
3641
  [(match_operand:SI 0 "register_operand" "")
3642
   (match_operand:V4SI 1 "register_operand" "")
3643
   (match_operand 2 "const_int_operand" "")]
3644
  "TARGET_SSE"
3645
{
3646
  ix86_expand_vector_extract (false, operands[0], operands[1],
3647
                              INTVAL (operands[2]));
3648
  DONE;
3649
})
3650
 
3651
(define_expand "vec_initv4si"
3652
  [(match_operand:V4SI 0 "register_operand" "")
3653
   (match_operand 1 "" "")]
3654
  "TARGET_SSE"
3655
{
3656
  ix86_expand_vector_init (false, operands[0], operands[1]);
3657
  DONE;
3658
})
3659
 
3660
(define_expand "vec_setv8hi"
3661
  [(match_operand:V8HI 0 "register_operand" "")
3662
   (match_operand:HI 1 "register_operand" "")
3663
   (match_operand 2 "const_int_operand" "")]
3664
  "TARGET_SSE"
3665
{
3666
  ix86_expand_vector_set (false, operands[0], operands[1],
3667
                          INTVAL (operands[2]));
3668
  DONE;
3669
})
3670
 
3671
(define_expand "vec_extractv8hi"
3672
  [(match_operand:HI 0 "register_operand" "")
3673
   (match_operand:V8HI 1 "register_operand" "")
3674
   (match_operand 2 "const_int_operand" "")]
3675
  "TARGET_SSE"
3676
{
3677
  ix86_expand_vector_extract (false, operands[0], operands[1],
3678
                              INTVAL (operands[2]));
3679
  DONE;
3680
})
3681
 
3682
(define_expand "vec_initv8hi"
3683
  [(match_operand:V8HI 0 "register_operand" "")
3684
   (match_operand 1 "" "")]
3685
  "TARGET_SSE"
3686
{
3687
  ix86_expand_vector_init (false, operands[0], operands[1]);
3688
  DONE;
3689
})
3690
 
3691
(define_expand "vec_setv16qi"
3692
  [(match_operand:V16QI 0 "register_operand" "")
3693
   (match_operand:QI 1 "register_operand" "")
3694
   (match_operand 2 "const_int_operand" "")]
3695
  "TARGET_SSE"
3696
{
3697
  ix86_expand_vector_set (false, operands[0], operands[1],
3698
                          INTVAL (operands[2]));
3699
  DONE;
3700
})
3701
 
3702
(define_expand "vec_extractv16qi"
3703
  [(match_operand:QI 0 "register_operand" "")
3704
   (match_operand:V16QI 1 "register_operand" "")
3705
   (match_operand 2 "const_int_operand" "")]
3706
  "TARGET_SSE"
3707
{
3708
  ix86_expand_vector_extract (false, operands[0], operands[1],
3709
                              INTVAL (operands[2]));
3710
  DONE;
3711
})
3712
 
3713
(define_expand "vec_initv16qi"
3714
  [(match_operand:V16QI 0 "register_operand" "")
3715
   (match_operand 1 "" "")]
3716
  "TARGET_SSE"
3717
{
3718
  ix86_expand_vector_init (false, operands[0], operands[1]);
3719
  DONE;
3720
})
3721
 
3722
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3723
;;
3724
;; Miscellaneous
3725
;;
3726
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3727
 
3728
(define_insn "sse2_uavgv16qi3"
3729
  [(set (match_operand:V16QI 0 "register_operand" "=x")
3730
        (truncate:V16QI
3731
          (lshiftrt:V16HI
3732
            (plus:V16HI
3733
              (plus:V16HI
3734
                (zero_extend:V16HI
3735
                  (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3736
                (zero_extend:V16HI
3737
                  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3738
              (const_vector:V16QI [(const_int 1) (const_int 1)
3739
                                   (const_int 1) (const_int 1)
3740
                                   (const_int 1) (const_int 1)
3741
                                   (const_int 1) (const_int 1)
3742
                                   (const_int 1) (const_int 1)
3743
                                   (const_int 1) (const_int 1)
3744
                                   (const_int 1) (const_int 1)
3745
                                   (const_int 1) (const_int 1)]))
3746
            (const_int 1))))]
3747
  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3748
  "pavgb\t{%2, %0|%0, %2}"
3749
  [(set_attr "type" "sseiadd")
3750
   (set_attr "mode" "TI")])
3751
 
3752
(define_insn "sse2_uavgv8hi3"
3753
  [(set (match_operand:V8HI 0 "register_operand" "=x")
3754
        (truncate:V8HI
3755
          (lshiftrt:V8SI
3756
            (plus:V8SI
3757
              (plus:V8SI
3758
                (zero_extend:V8SI
3759
                  (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3760
                (zero_extend:V8SI
3761
                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3762
              (const_vector:V8HI [(const_int 1) (const_int 1)
3763
                                  (const_int 1) (const_int 1)
3764
                                  (const_int 1) (const_int 1)
3765
                                  (const_int 1) (const_int 1)]))
3766
            (const_int 1))))]
3767
  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3768
  "pavgw\t{%2, %0|%0, %2}"
3769
  [(set_attr "type" "sseiadd")
3770
   (set_attr "mode" "TI")])
3771
 
3772
;; The correct representation for this is absolutely enormous, and
3773
;; surely not generally useful.
3774
(define_insn "sse2_psadbw"
3775
  [(set (match_operand:V2DI 0 "register_operand" "=x")
3776
        (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3777
                      (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3778
                     UNSPEC_PSADBW))]
3779
  "TARGET_SSE2"
3780
  "psadbw\t{%2, %0|%0, %2}"
3781
  [(set_attr "type" "sseiadd")
3782
   (set_attr "mode" "TI")])
3783
 
3784
(define_insn "sse_movmskps"
3785
  [(set (match_operand:SI 0 "register_operand" "=r")
3786
        (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3787
                   UNSPEC_MOVMSK))]
3788
  "TARGET_SSE"
3789
  "movmskps\t{%1, %0|%0, %1}"
3790
  [(set_attr "type" "ssecvt")
3791
   (set_attr "mode" "V4SF")])
3792
 
3793
(define_insn "sse2_movmskpd"
3794
  [(set (match_operand:SI 0 "register_operand" "=r")
3795
        (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3796
                   UNSPEC_MOVMSK))]
3797
  "TARGET_SSE2"
3798
  "movmskpd\t{%1, %0|%0, %1}"
3799
  [(set_attr "type" "ssecvt")
3800
   (set_attr "mode" "V2DF")])
3801
 
3802
(define_insn "sse2_pmovmskb"
3803
  [(set (match_operand:SI 0 "register_operand" "=r")
3804
        (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3805
                   UNSPEC_MOVMSK))]
3806
  "TARGET_SSE2"
3807
  "pmovmskb\t{%1, %0|%0, %1}"
3808
  [(set_attr "type" "ssecvt")
3809
   (set_attr "mode" "V2DF")])
3810
 
3811
(define_expand "sse2_maskmovdqu"
3812
  [(set (match_operand:V16QI 0 "memory_operand" "")
3813
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3814
                       (match_operand:V16QI 2 "register_operand" "x")
3815
                       (match_dup 0)]
3816
                      UNSPEC_MASKMOV))]
3817
  "TARGET_SSE2"
3818
  "")
3819
 
3820
(define_insn "*sse2_maskmovdqu"
3821
  [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3822
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3823
                       (match_operand:V16QI 2 "register_operand" "x")
3824
                       (mem:V16QI (match_dup 0))]
3825
                      UNSPEC_MASKMOV))]
3826
  "TARGET_SSE2 && !TARGET_64BIT"
3827
  ;; @@@ check ordering of operands in intel/nonintel syntax
3828
  "maskmovdqu\t{%2, %1|%1, %2}"
3829
  [(set_attr "type" "ssecvt")
3830
   (set_attr "mode" "TI")])
3831
 
3832
(define_insn "*sse2_maskmovdqu_rex64"
3833
  [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3834
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3835
                       (match_operand:V16QI 2 "register_operand" "x")
3836
                       (mem:V16QI (match_dup 0))]
3837
                      UNSPEC_MASKMOV))]
3838
  "TARGET_SSE2 && TARGET_64BIT"
3839
  ;; @@@ check ordering of operands in intel/nonintel syntax
3840
  "maskmovdqu\t{%2, %1|%1, %2}"
3841
  [(set_attr "type" "ssecvt")
3842
   (set_attr "mode" "TI")])
3843
 
3844
(define_insn "sse_ldmxcsr"
3845
  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3846
                    UNSPECV_LDMXCSR)]
3847
  "TARGET_SSE"
3848
  "ldmxcsr\t%0"
3849
  [(set_attr "type" "sse")
3850
   (set_attr "memory" "load")])
3851
 
3852
(define_insn "sse_stmxcsr"
3853
  [(set (match_operand:SI 0 "memory_operand" "=m")
3854
        (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3855
  "TARGET_SSE"
3856
  "stmxcsr\t%0"
3857
  [(set_attr "type" "sse")
3858
   (set_attr "memory" "store")])
3859
 
3860
(define_expand "sse_sfence"
3861
  [(set (match_dup 0)
3862
        (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3863
  "TARGET_SSE || TARGET_3DNOW_A"
3864
{
3865
  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3866
  MEM_VOLATILE_P (operands[0]) = 1;
3867
})
3868
 
3869
(define_insn "*sse_sfence"
3870
  [(set (match_operand:BLK 0 "" "")
3871
        (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3872
  "TARGET_SSE || TARGET_3DNOW_A"
3873
  "sfence"
3874
  [(set_attr "type" "sse")
3875
   (set_attr "memory" "unknown")])
3876
 
3877
(define_insn "sse2_clflush"
3878
  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3879
                    UNSPECV_CLFLUSH)]
3880
  "TARGET_SSE2"
3881
  "clflush\t%a0"
3882
  [(set_attr "type" "sse")
3883
   (set_attr "memory" "unknown")])
3884
 
3885
(define_expand "sse2_mfence"
3886
  [(set (match_dup 0)
3887
        (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3888
  "TARGET_SSE2"
3889
{
3890
  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3891
  MEM_VOLATILE_P (operands[0]) = 1;
3892
})
3893
 
3894
(define_insn "*sse2_mfence"
3895
  [(set (match_operand:BLK 0 "" "")
3896
        (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3897
  "TARGET_SSE2"
3898
  "mfence"
3899
  [(set_attr "type" "sse")
3900
   (set_attr "memory" "unknown")])
3901
 
3902
(define_expand "sse2_lfence"
3903
  [(set (match_dup 0)
3904
        (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3905
  "TARGET_SSE2"
3906
{
3907
  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3908
  MEM_VOLATILE_P (operands[0]) = 1;
3909
})
3910
 
3911
(define_insn "*sse2_lfence"
3912
  [(set (match_operand:BLK 0 "" "")
3913
        (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3914
  "TARGET_SSE2"
3915
  "lfence"
3916
  [(set_attr "type" "sse")
3917
   (set_attr "memory" "unknown")])
3918
 
3919
(define_insn "sse3_mwait"
3920
  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3921
                     (match_operand:SI 1 "register_operand" "c")]
3922
                    UNSPECV_MWAIT)]
3923
  "TARGET_SSE3"
3924
;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
3925
;; Since 32bit register operands are implicitly zero extended to 64bit,
3926
;; we only need to set up 32bit registers.
3927
  "mwait"
3928
  [(set_attr "length" "3")])
3929
 
3930
(define_insn "sse3_monitor"
3931
  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3932
                     (match_operand:SI 1 "register_operand" "c")
3933
                     (match_operand:SI 2 "register_operand" "d")]
3934
                    UNSPECV_MONITOR)]
3935
  "TARGET_SSE3 && !TARGET_64BIT"
3936
  "monitor\t%0, %1, %2"
3937
  [(set_attr "length" "3")])
3938
 
3939
(define_insn "sse3_monitor64"
3940
  [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
3941
                     (match_operand:SI 1 "register_operand" "c")
3942
                     (match_operand:SI 2 "register_operand" "d")]
3943
                    UNSPECV_MONITOR)]
3944
  "TARGET_SSE3 && TARGET_64BIT"
3945
;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
3946
;; RCX and RDX are used.  Since 32bit register operands are implicitly
3947
;; zero extended to 64bit, we only need to set up 32bit registers.
3948
  "monitor"
3949
  [(set_attr "length" "3")])

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.