OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [gcc/] [config/] [i386/] [emmintrin.h] - Blame information for rev 826

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 282 jeremybenn
/* Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
2
   Free Software Foundation, Inc.
3
 
4
   This file is part of GCC.
5
 
6
   GCC is free software; you can redistribute it and/or modify
7
   it under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 3, or (at your option)
9
   any later version.
10
 
11
   GCC is distributed in the hope that it will be useful,
12
   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
   GNU General Public License for more details.
15
 
16
   Under Section 7 of GPL version 3, you are granted additional
17
   permissions described in the GCC Runtime Library Exception, version
18
   3.1, as published by the Free Software Foundation.
19
 
20
   You should have received a copy of the GNU General Public License and
21
   a copy of the GCC Runtime Library Exception along with this program;
22
   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23
   <http://www.gnu.org/licenses/>.  */
24
 
25
/* Implemented from the specification included in the Intel C++ Compiler
26
   User Guide and Reference, version 9.0.  */
27
 
28
#ifndef _EMMINTRIN_H_INCLUDED
29
#define _EMMINTRIN_H_INCLUDED
30
 
31
#ifndef __SSE2__
32
# error "SSE2 instruction set not enabled"
33
#else
34
 
35
/* We need definitions from the SSE header files*/
36
#include <xmmintrin.h>
37
 
38
/* SSE2 */
39
typedef double __v2df __attribute__ ((__vector_size__ (16)));
40
typedef long long __v2di __attribute__ ((__vector_size__ (16)));
41
typedef int __v4si __attribute__ ((__vector_size__ (16)));
42
typedef short __v8hi __attribute__ ((__vector_size__ (16)));
43
typedef char __v16qi __attribute__ ((__vector_size__ (16)));
44
 
45
/* The Intel API is flexible enough that we must allow aliasing with other
46
   vector types, and their scalar components.  */
47
typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
48
typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
49
 
50
/* Create a selector for use with the SHUFPD instruction.  */
51
#define _MM_SHUFFLE2(fp1,fp0) \
52
 (((fp1) << 1) | (fp0))
53
 
54
/* Create a vector with element 0 as F and the rest zero.  */
55
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
56
_mm_set_sd (double __F)
57
{
58
  return __extension__ (__m128d){ __F, 0.0 };
59
}
60
 
61
/* Create a vector with both elements equal to F.  */
62
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
63
_mm_set1_pd (double __F)
64
{
65
  return __extension__ (__m128d){ __F, __F };
66
}
67
 
68
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
69
_mm_set_pd1 (double __F)
70
{
71
  return _mm_set1_pd (__F);
72
}
73
 
74
/* Create a vector with the lower value X and upper value W.  */
75
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
76
_mm_set_pd (double __W, double __X)
77
{
78
  return __extension__ (__m128d){ __X, __W };
79
}
80
 
81
/* Create a vector with the lower value W and upper value X.  */
82
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
83
_mm_setr_pd (double __W, double __X)
84
{
85
  return __extension__ (__m128d){ __W, __X };
86
}
87
 
88
/* Create a vector of zeros.  */
89
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
90
_mm_setzero_pd (void)
91
{
92
  return __extension__ (__m128d){ 0.0, 0.0 };
93
}
94
 
95
/* Sets the low DPFP value of A from the low value of B.  */
96
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
97
_mm_move_sd (__m128d __A, __m128d __B)
98
{
99
  return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
100
}
101
 
102
/* Load two DPFP values from P.  The address must be 16-byte aligned.  */
103
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
104
_mm_load_pd (double const *__P)
105
{
106
  return *(__m128d *)__P;
107
}
108
 
109
/* Load two DPFP values from P.  The address need not be 16-byte aligned.  */
110
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
111
_mm_loadu_pd (double const *__P)
112
{
113
  return __builtin_ia32_loadupd (__P);
114
}
115
 
116
/* Create a vector with all two elements equal to *P.  */
117
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
118
_mm_load1_pd (double const *__P)
119
{
120
  return _mm_set1_pd (*__P);
121
}
122
 
123
/* Create a vector with element 0 as *P and the rest zero.  */
124
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
125
_mm_load_sd (double const *__P)
126
{
127
  return _mm_set_sd (*__P);
128
}
129
 
130
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
131
_mm_load_pd1 (double const *__P)
132
{
133
  return _mm_load1_pd (__P);
134
}
135
 
136
/* Load two DPFP values in reverse order.  The address must be aligned.  */
137
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
138
_mm_loadr_pd (double const *__P)
139
{
140
  __m128d __tmp = _mm_load_pd (__P);
141
  return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
142
}
143
 
144
/* Store two DPFP values.  The address must be 16-byte aligned.  */
145
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
146
_mm_store_pd (double *__P, __m128d __A)
147
{
148
  *(__m128d *)__P = __A;
149
}
150
 
151
/* Store two DPFP values.  The address need not be 16-byte aligned.  */
152
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
153
_mm_storeu_pd (double *__P, __m128d __A)
154
{
155
  __builtin_ia32_storeupd (__P, __A);
156
}
157
 
158
/* Stores the lower DPFP value.  */
159
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
160
_mm_store_sd (double *__P, __m128d __A)
161
{
162
  *__P = __builtin_ia32_vec_ext_v2df (__A, 0);
163
}
164
 
165
extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
166
_mm_cvtsd_f64 (__m128d __A)
167
{
168
  return __builtin_ia32_vec_ext_v2df (__A, 0);
169
}
170
 
171
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
172
_mm_storel_pd (double *__P, __m128d __A)
173
{
174
  _mm_store_sd (__P, __A);
175
}
176
 
177
/* Stores the upper DPFP value.  */
178
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
179
_mm_storeh_pd (double *__P, __m128d __A)
180
{
181
  *__P = __builtin_ia32_vec_ext_v2df (__A, 1);
182
}
183
 
184
/* Store the lower DPFP value across two words.
185
   The address must be 16-byte aligned.  */
186
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
187
_mm_store1_pd (double *__P, __m128d __A)
188
{
189
  _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
190
}
191
 
192
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
193
_mm_store_pd1 (double *__P, __m128d __A)
194
{
195
  _mm_store1_pd (__P, __A);
196
}
197
 
198
/* Store two DPFP values in reverse order.  The address must be aligned.  */
199
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
200
_mm_storer_pd (double *__P, __m128d __A)
201
{
202
  _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
203
}
204
 
205
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
206
_mm_cvtsi128_si32 (__m128i __A)
207
{
208
  return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
209
}
210
 
211
#ifdef __x86_64__
212
/* Intel intrinsic.  */
213
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
214
_mm_cvtsi128_si64 (__m128i __A)
215
{
216
  return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
217
}
218
 
219
/* Microsoft intrinsic.  */
220
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
221
_mm_cvtsi128_si64x (__m128i __A)
222
{
223
  return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
224
}
225
#endif
226
 
227
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
228
_mm_add_pd (__m128d __A, __m128d __B)
229
{
230
  return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
231
}
232
 
233
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
234
_mm_add_sd (__m128d __A, __m128d __B)
235
{
236
  return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
237
}
238
 
239
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
240
_mm_sub_pd (__m128d __A, __m128d __B)
241
{
242
  return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
243
}
244
 
245
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
246
_mm_sub_sd (__m128d __A, __m128d __B)
247
{
248
  return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
249
}
250
 
251
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
252
_mm_mul_pd (__m128d __A, __m128d __B)
253
{
254
  return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
255
}
256
 
257
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
258
_mm_mul_sd (__m128d __A, __m128d __B)
259
{
260
  return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
261
}
262
 
263
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
264
_mm_div_pd (__m128d __A, __m128d __B)
265
{
266
  return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
267
}
268
 
269
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
270
_mm_div_sd (__m128d __A, __m128d __B)
271
{
272
  return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
273
}
274
 
275
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
276
_mm_sqrt_pd (__m128d __A)
277
{
278
  return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
279
}
280
 
281
/* Return pair {sqrt (A[0), B[1]}.  */
282
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
283
_mm_sqrt_sd (__m128d __A, __m128d __B)
284
{
285
  __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
286
  return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
287
}
288
 
289
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
290
_mm_min_pd (__m128d __A, __m128d __B)
291
{
292
  return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
293
}
294
 
295
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
296
_mm_min_sd (__m128d __A, __m128d __B)
297
{
298
  return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
299
}
300
 
301
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
302
_mm_max_pd (__m128d __A, __m128d __B)
303
{
304
  return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
305
}
306
 
307
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
308
_mm_max_sd (__m128d __A, __m128d __B)
309
{
310
  return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
311
}
312
 
313
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
314
_mm_and_pd (__m128d __A, __m128d __B)
315
{
316
  return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
317
}
318
 
319
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
320
_mm_andnot_pd (__m128d __A, __m128d __B)
321
{
322
  return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
323
}
324
 
325
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
326
_mm_or_pd (__m128d __A, __m128d __B)
327
{
328
  return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
329
}
330
 
331
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
332
_mm_xor_pd (__m128d __A, __m128d __B)
333
{
334
  return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
335
}
336
 
337
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
338
_mm_cmpeq_pd (__m128d __A, __m128d __B)
339
{
340
  return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
341
}
342
 
343
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
344
_mm_cmplt_pd (__m128d __A, __m128d __B)
345
{
346
  return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
347
}
348
 
349
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
350
_mm_cmple_pd (__m128d __A, __m128d __B)
351
{
352
  return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
353
}
354
 
355
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
356
_mm_cmpgt_pd (__m128d __A, __m128d __B)
357
{
358
  return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
359
}
360
 
361
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
362
_mm_cmpge_pd (__m128d __A, __m128d __B)
363
{
364
  return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
365
}
366
 
367
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
368
_mm_cmpneq_pd (__m128d __A, __m128d __B)
369
{
370
  return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
371
}
372
 
373
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
374
_mm_cmpnlt_pd (__m128d __A, __m128d __B)
375
{
376
  return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
377
}
378
 
379
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
380
_mm_cmpnle_pd (__m128d __A, __m128d __B)
381
{
382
  return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
383
}
384
 
385
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
386
_mm_cmpngt_pd (__m128d __A, __m128d __B)
387
{
388
  return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
389
}
390
 
391
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
392
_mm_cmpnge_pd (__m128d __A, __m128d __B)
393
{
394
  return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
395
}
396
 
397
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
398
_mm_cmpord_pd (__m128d __A, __m128d __B)
399
{
400
  return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
401
}
402
 
403
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
404
_mm_cmpunord_pd (__m128d __A, __m128d __B)
405
{
406
  return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
407
}
408
 
409
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
410
_mm_cmpeq_sd (__m128d __A, __m128d __B)
411
{
412
  return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
413
}
414
 
415
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
416
_mm_cmplt_sd (__m128d __A, __m128d __B)
417
{
418
  return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
419
}
420
 
421
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
422
_mm_cmple_sd (__m128d __A, __m128d __B)
423
{
424
  return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
425
}
426
 
427
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
428
_mm_cmpgt_sd (__m128d __A, __m128d __B)
429
{
430
  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
431
                                         (__v2df)
432
                                         __builtin_ia32_cmpltsd ((__v2df) __B,
433
                                                                 (__v2df)
434
                                                                 __A));
435
}
436
 
437
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
438
_mm_cmpge_sd (__m128d __A, __m128d __B)
439
{
440
  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
441
                                         (__v2df)
442
                                         __builtin_ia32_cmplesd ((__v2df) __B,
443
                                                                 (__v2df)
444
                                                                 __A));
445
}
446
 
447
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
448
_mm_cmpneq_sd (__m128d __A, __m128d __B)
449
{
450
  return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
451
}
452
 
453
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
454
_mm_cmpnlt_sd (__m128d __A, __m128d __B)
455
{
456
  return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
457
}
458
 
459
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
460
_mm_cmpnle_sd (__m128d __A, __m128d __B)
461
{
462
  return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
463
}
464
 
465
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
466
_mm_cmpngt_sd (__m128d __A, __m128d __B)
467
{
468
  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
469
                                         (__v2df)
470
                                         __builtin_ia32_cmpnltsd ((__v2df) __B,
471
                                                                  (__v2df)
472
                                                                  __A));
473
}
474
 
475
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
476
_mm_cmpnge_sd (__m128d __A, __m128d __B)
477
{
478
  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
479
                                         (__v2df)
480
                                         __builtin_ia32_cmpnlesd ((__v2df) __B,
481
                                                                  (__v2df)
482
                                                                  __A));
483
}
484
 
485
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
486
_mm_cmpord_sd (__m128d __A, __m128d __B)
487
{
488
  return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
489
}
490
 
491
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
492
_mm_cmpunord_sd (__m128d __A, __m128d __B)
493
{
494
  return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
495
}
496
 
497
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
498
_mm_comieq_sd (__m128d __A, __m128d __B)
499
{
500
  return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
501
}
502
 
503
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
504
_mm_comilt_sd (__m128d __A, __m128d __B)
505
{
506
  return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
507
}
508
 
509
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
510
_mm_comile_sd (__m128d __A, __m128d __B)
511
{
512
  return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
513
}
514
 
515
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
516
_mm_comigt_sd (__m128d __A, __m128d __B)
517
{
518
  return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
519
}
520
 
521
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
522
_mm_comige_sd (__m128d __A, __m128d __B)
523
{
524
  return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
525
}
526
 
527
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
528
_mm_comineq_sd (__m128d __A, __m128d __B)
529
{
530
  return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
531
}
532
 
533
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
534
_mm_ucomieq_sd (__m128d __A, __m128d __B)
535
{
536
  return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
537
}
538
 
539
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
540
_mm_ucomilt_sd (__m128d __A, __m128d __B)
541
{
542
  return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
543
}
544
 
545
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
546
_mm_ucomile_sd (__m128d __A, __m128d __B)
547
{
548
  return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
549
}
550
 
551
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
552
_mm_ucomigt_sd (__m128d __A, __m128d __B)
553
{
554
  return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
555
}
556
 
557
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
558
_mm_ucomige_sd (__m128d __A, __m128d __B)
559
{
560
  return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
561
}
562
 
563
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
564
_mm_ucomineq_sd (__m128d __A, __m128d __B)
565
{
566
  return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
567
}
568
 
569
/* Create a vector of Qi, where i is the element number.  */
570
 
571
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
572
_mm_set_epi64x (long long __q1, long long __q0)
573
{
574
  return __extension__ (__m128i)(__v2di){ __q0, __q1 };
575
}
576
 
577
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
578
_mm_set_epi64 (__m64 __q1,  __m64 __q0)
579
{
580
  return _mm_set_epi64x ((long long)__q1, (long long)__q0);
581
}
582
 
583
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
584
_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
585
{
586
  return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
587
}
588
 
589
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
590
_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
591
               short __q3, short __q2, short __q1, short __q0)
592
{
593
  return __extension__ (__m128i)(__v8hi){
594
    __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
595
}
596
 
597
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
598
_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
599
              char __q11, char __q10, char __q09, char __q08,
600
              char __q07, char __q06, char __q05, char __q04,
601
              char __q03, char __q02, char __q01, char __q00)
602
{
603
  return __extension__ (__m128i)(__v16qi){
604
    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
605
    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
606
  };
607
}
608
 
609
/* Set all of the elements of the vector to A.  */
610
 
611
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
612
_mm_set1_epi64x (long long __A)
613
{
614
  return _mm_set_epi64x (__A, __A);
615
}
616
 
617
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
618
_mm_set1_epi64 (__m64 __A)
619
{
620
  return _mm_set_epi64 (__A, __A);
621
}
622
 
623
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
624
_mm_set1_epi32 (int __A)
625
{
626
  return _mm_set_epi32 (__A, __A, __A, __A);
627
}
628
 
629
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
630
_mm_set1_epi16 (short __A)
631
{
632
  return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
633
}
634
 
635
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
636
_mm_set1_epi8 (char __A)
637
{
638
  return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
639
                       __A, __A, __A, __A, __A, __A, __A, __A);
640
}
641
 
642
/* Create a vector of Qi, where i is the element number.
643
   The parameter order is reversed from the _mm_set_epi* functions.  */
644
 
645
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
646
_mm_setr_epi64 (__m64 __q0, __m64 __q1)
647
{
648
  return _mm_set_epi64 (__q1, __q0);
649
}
650
 
651
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
652
_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
653
{
654
  return _mm_set_epi32 (__q3, __q2, __q1, __q0);
655
}
656
 
657
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
658
_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
659
                short __q4, short __q5, short __q6, short __q7)
660
{
661
  return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
662
}
663
 
664
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
665
_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
666
               char __q04, char __q05, char __q06, char __q07,
667
               char __q08, char __q09, char __q10, char __q11,
668
               char __q12, char __q13, char __q14, char __q15)
669
{
670
  return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
671
                       __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
672
}
673
 
674
/* Create a vector with element 0 as *P and the rest zero.  */
675
 
676
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
677
_mm_load_si128 (__m128i const *__P)
678
{
679
  return *__P;
680
}
681
 
682
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
683
_mm_loadu_si128 (__m128i const *__P)
684
{
685
  return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
686
}
687
 
688
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
689
_mm_loadl_epi64 (__m128i const *__P)
690
{
691
  return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P);
692
}
693
 
694
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
695
_mm_store_si128 (__m128i *__P, __m128i __B)
696
{
697
  *__P = __B;
698
}
699
 
700
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
701
_mm_storeu_si128 (__m128i *__P, __m128i __B)
702
{
703
  __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
704
}
705
 
706
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
707
_mm_storel_epi64 (__m128i *__P, __m128i __B)
708
{
709
  *(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
710
}
711
 
712
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
713
_mm_movepi64_pi64 (__m128i __B)
714
{
715
  return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
716
}
717
 
718
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
719
_mm_movpi64_epi64 (__m64 __A)
720
{
721
  return _mm_set_epi64 ((__m64)0LL, __A);
722
}
723
 
724
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
725
_mm_move_epi64 (__m128i __A)
726
{
727
  return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
728
}
729
 
730
/* Create a vector of zeros.  */
731
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
732
_mm_setzero_si128 (void)
733
{
734
  return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
735
}
736
 
737
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
738
_mm_cvtepi32_pd (__m128i __A)
739
{
740
  return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
741
}
742
 
743
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
744
_mm_cvtepi32_ps (__m128i __A)
745
{
746
  return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
747
}
748
 
749
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
750
_mm_cvtpd_epi32 (__m128d __A)
751
{
752
  return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
753
}
754
 
755
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
756
_mm_cvtpd_pi32 (__m128d __A)
757
{
758
  return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
759
}
760
 
761
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
762
_mm_cvtpd_ps (__m128d __A)
763
{
764
  return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
765
}
766
 
767
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
768
_mm_cvttpd_epi32 (__m128d __A)
769
{
770
  return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
771
}
772
 
773
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
774
_mm_cvttpd_pi32 (__m128d __A)
775
{
776
  return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
777
}
778
 
779
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
780
_mm_cvtpi32_pd (__m64 __A)
781
{
782
  return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
783
}
784
 
785
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
786
_mm_cvtps_epi32 (__m128 __A)
787
{
788
  return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
789
}
790
 
791
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
792
_mm_cvttps_epi32 (__m128 __A)
793
{
794
  return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
795
}
796
 
797
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
798
_mm_cvtps_pd (__m128 __A)
799
{
800
  return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
801
}
802
 
803
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
804
_mm_cvtsd_si32 (__m128d __A)
805
{
806
  return __builtin_ia32_cvtsd2si ((__v2df) __A);
807
}
808
 
809
#ifdef __x86_64__
810
/* Intel intrinsic.  */
811
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
812
_mm_cvtsd_si64 (__m128d __A)
813
{
814
  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
815
}
816
 
817
/* Microsoft intrinsic.  */
818
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
819
_mm_cvtsd_si64x (__m128d __A)
820
{
821
  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
822
}
823
#endif
824
 
825
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
826
_mm_cvttsd_si32 (__m128d __A)
827
{
828
  return __builtin_ia32_cvttsd2si ((__v2df) __A);
829
}
830
 
831
#ifdef __x86_64__
832
/* Intel intrinsic.  */
833
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
834
_mm_cvttsd_si64 (__m128d __A)
835
{
836
  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
837
}
838
 
839
/* Microsoft intrinsic.  */
840
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
841
_mm_cvttsd_si64x (__m128d __A)
842
{
843
  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
844
}
845
#endif
846
 
847
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
848
_mm_cvtsd_ss (__m128 __A, __m128d __B)
849
{
850
  return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
851
}
852
 
853
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
854
_mm_cvtsi32_sd (__m128d __A, int __B)
855
{
856
  return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
857
}
858
 
859
#ifdef __x86_64__
860
/* Intel intrinsic.  */
861
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
862
_mm_cvtsi64_sd (__m128d __A, long long __B)
863
{
864
  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
865
}
866
 
867
/* Microsoft intrinsic.  */
868
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
869
_mm_cvtsi64x_sd (__m128d __A, long long __B)
870
{
871
  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
872
}
873
#endif
874
 
875
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
876
_mm_cvtss_sd (__m128d __A, __m128 __B)
877
{
878
  return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
879
}
880
 
881
#ifdef __OPTIMIZE__
882
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
883
_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
884
{
885
  return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
886
}
887
#else
888
#define _mm_shuffle_pd(A, B, N)                                         \
889
  ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A),                \
890
                                   (__v2df)(__m128d)(B), (int)(N)))
891
#endif
892
 
893
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
894
_mm_unpackhi_pd (__m128d __A, __m128d __B)
895
{
896
  return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
897
}
898
 
899
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
900
_mm_unpacklo_pd (__m128d __A, __m128d __B)
901
{
902
  return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
903
}
904
 
905
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
906
_mm_loadh_pd (__m128d __A, double const *__B)
907
{
908
  return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
909
}
910
 
911
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
912
_mm_loadl_pd (__m128d __A, double const *__B)
913
{
914
  return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
915
}
916
 
917
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
918
_mm_movemask_pd (__m128d __A)
919
{
920
  return __builtin_ia32_movmskpd ((__v2df)__A);
921
}
922
 
923
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
924
_mm_packs_epi16 (__m128i __A, __m128i __B)
925
{
926
  return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
927
}
928
 
929
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
930
_mm_packs_epi32 (__m128i __A, __m128i __B)
931
{
932
  return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
933
}
934
 
935
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
936
_mm_packus_epi16 (__m128i __A, __m128i __B)
937
{
938
  return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
939
}
940
 
941
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
942
_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
943
{
944
  return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
945
}
946
 
947
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
948
_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
949
{
950
  return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
951
}
952
 
953
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
954
_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
955
{
956
  return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
957
}
958
 
959
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
960
_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
961
{
962
  return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
963
}
964
 
965
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
966
_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
967
{
968
  return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
969
}
970
 
971
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
972
_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
973
{
974
  return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
975
}
976
 
977
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
978
_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
979
{
980
  return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
981
}
982
 
983
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
984
_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
985
{
986
  return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
987
}
988
 
989
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
990
_mm_add_epi8 (__m128i __A, __m128i __B)
991
{
992
  return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
993
}
994
 
995
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
996
_mm_add_epi16 (__m128i __A, __m128i __B)
997
{
998
  return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
999
}
1000
 
1001
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1002
_mm_add_epi32 (__m128i __A, __m128i __B)
1003
{
1004
  return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
1005
}
1006
 
1007
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1008
_mm_add_epi64 (__m128i __A, __m128i __B)
1009
{
1010
  return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
1011
}
1012
 
1013
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1014
_mm_adds_epi8 (__m128i __A, __m128i __B)
1015
{
1016
  return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
1017
}
1018
 
1019
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1020
_mm_adds_epi16 (__m128i __A, __m128i __B)
1021
{
1022
  return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
1023
}
1024
 
1025
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1026
_mm_adds_epu8 (__m128i __A, __m128i __B)
1027
{
1028
  return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
1029
}
1030
 
1031
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1032
_mm_adds_epu16 (__m128i __A, __m128i __B)
1033
{
1034
  return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
1035
}
1036
 
1037
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1038
_mm_sub_epi8 (__m128i __A, __m128i __B)
1039
{
1040
  return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
1041
}
1042
 
1043
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1044
_mm_sub_epi16 (__m128i __A, __m128i __B)
1045
{
1046
  return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
1047
}
1048
 
1049
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1050
_mm_sub_epi32 (__m128i __A, __m128i __B)
1051
{
1052
  return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
1053
}
1054
 
1055
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1056
_mm_sub_epi64 (__m128i __A, __m128i __B)
1057
{
1058
  return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
1059
}
1060
 
1061
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1062
_mm_subs_epi8 (__m128i __A, __m128i __B)
1063
{
1064
  return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
1065
}
1066
 
1067
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1068
_mm_subs_epi16 (__m128i __A, __m128i __B)
1069
{
1070
  return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
1071
}
1072
 
1073
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1074
_mm_subs_epu8 (__m128i __A, __m128i __B)
1075
{
1076
  return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
1077
}
1078
 
1079
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1080
_mm_subs_epu16 (__m128i __A, __m128i __B)
1081
{
1082
  return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
1083
}
1084
 
1085
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1086
_mm_madd_epi16 (__m128i __A, __m128i __B)
1087
{
1088
  return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
1089
}
1090
 
1091
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1092
_mm_mulhi_epi16 (__m128i __A, __m128i __B)
1093
{
1094
  return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
1095
}
1096
 
1097
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1098
_mm_mullo_epi16 (__m128i __A, __m128i __B)
1099
{
1100
  return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
1101
}
1102
 
1103
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1104
_mm_mul_su32 (__m64 __A, __m64 __B)
1105
{
1106
  return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
1107
}
1108
 
1109
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1110
_mm_mul_epu32 (__m128i __A, __m128i __B)
1111
{
1112
  return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
1113
}
1114
 
1115
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1116
_mm_slli_epi16 (__m128i __A, int __B)
1117
{
1118
  return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
1119
}
1120
 
1121
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1122
_mm_slli_epi32 (__m128i __A, int __B)
1123
{
1124
  return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
1125
}
1126
 
1127
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1128
_mm_slli_epi64 (__m128i __A, int __B)
1129
{
1130
  return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
1131
}
1132
 
1133
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1134
_mm_srai_epi16 (__m128i __A, int __B)
1135
{
1136
  return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
1137
}
1138
 
1139
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1140
_mm_srai_epi32 (__m128i __A, int __B)
1141
{
1142
  return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
1143
}
1144
 
1145
#ifdef __OPTIMIZE__
1146
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1147
_mm_srli_si128 (__m128i __A, const int __N)
1148
{
1149
  return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
1150
}
1151
 
1152
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1153
_mm_slli_si128 (__m128i __A, const int __N)
1154
{
1155
  return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
1156
}
1157
#else
1158
#define _mm_srli_si128(A, N) \
1159
  ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1160
#define _mm_slli_si128(A, N) \
1161
  ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
1162
#endif
1163
 
1164
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1165
_mm_srli_epi16 (__m128i __A, int __B)
1166
{
1167
  return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
1168
}
1169
 
1170
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1171
_mm_srli_epi32 (__m128i __A, int __B)
1172
{
1173
  return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
1174
}
1175
 
1176
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1177
_mm_srli_epi64 (__m128i __A, int __B)
1178
{
1179
  return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
1180
}
1181
 
1182
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1183
_mm_sll_epi16 (__m128i __A, __m128i __B)
1184
{
1185
  return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
1186
}
1187
 
1188
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1189
_mm_sll_epi32 (__m128i __A, __m128i __B)
1190
{
1191
  return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
1192
}
1193
 
1194
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1195
_mm_sll_epi64 (__m128i __A, __m128i __B)
1196
{
1197
  return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
1198
}
1199
 
1200
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1201
_mm_sra_epi16 (__m128i __A, __m128i __B)
1202
{
1203
  return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
1204
}
1205
 
1206
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1207
_mm_sra_epi32 (__m128i __A, __m128i __B)
1208
{
1209
  return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
1210
}
1211
 
1212
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1213
_mm_srl_epi16 (__m128i __A, __m128i __B)
1214
{
1215
  return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
1216
}
1217
 
1218
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1219
_mm_srl_epi32 (__m128i __A, __m128i __B)
1220
{
1221
  return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
1222
}
1223
 
1224
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1225
_mm_srl_epi64 (__m128i __A, __m128i __B)
1226
{
1227
  return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
1228
}
1229
 
1230
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1231
_mm_and_si128 (__m128i __A, __m128i __B)
1232
{
1233
  return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
1234
}
1235
 
1236
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1237
_mm_andnot_si128 (__m128i __A, __m128i __B)
1238
{
1239
  return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
1240
}
1241
 
1242
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1243
_mm_or_si128 (__m128i __A, __m128i __B)
1244
{
1245
  return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
1246
}
1247
 
1248
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1249
_mm_xor_si128 (__m128i __A, __m128i __B)
1250
{
1251
  return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
1252
}
1253
 
1254
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1255
_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
1256
{
1257
  return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
1258
}
1259
 
1260
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1261
_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
1262
{
1263
  return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
1264
}
1265
 
1266
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1267
_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
1268
{
1269
  return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
1270
}
1271
 
1272
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1273
_mm_cmplt_epi8 (__m128i __A, __m128i __B)
1274
{
1275
  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
1276
}
1277
 
1278
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1279
_mm_cmplt_epi16 (__m128i __A, __m128i __B)
1280
{
1281
  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
1282
}
1283
 
1284
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1285
_mm_cmplt_epi32 (__m128i __A, __m128i __B)
1286
{
1287
  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
1288
}
1289
 
1290
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1291
_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
1292
{
1293
  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
1294
}
1295
 
1296
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1297
_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
1298
{
1299
  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
1300
}
1301
 
1302
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1303
_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
1304
{
1305
  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
1306
}
1307
 
1308
#ifdef __OPTIMIZE__
1309
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1310
_mm_extract_epi16 (__m128i const __A, int const __N)
1311
{
1312
  return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
1313
}
1314
 
1315
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1316
_mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
1317
{
1318
  return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
1319
}
1320
#else
1321
#define _mm_extract_epi16(A, N) \
1322
  ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
1323
#define _mm_insert_epi16(A, D, N)                               \
1324
  ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
1325
                                          (int)(D), (int)(N)))
1326
#endif
1327
 
1328
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1329
_mm_max_epi16 (__m128i __A, __m128i __B)
1330
{
1331
  return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
1332
}
1333
 
1334
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1335
_mm_max_epu8 (__m128i __A, __m128i __B)
1336
{
1337
  return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
1338
}
1339
 
1340
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1341
_mm_min_epi16 (__m128i __A, __m128i __B)
1342
{
1343
  return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
1344
}
1345
 
1346
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1347
_mm_min_epu8 (__m128i __A, __m128i __B)
1348
{
1349
  return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
1350
}
1351
 
1352
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1353
_mm_movemask_epi8 (__m128i __A)
1354
{
1355
  return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
1356
}
1357
 
1358
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1359
_mm_mulhi_epu16 (__m128i __A, __m128i __B)
1360
{
1361
  return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
1362
}
1363
 
1364
#ifdef __OPTIMIZE__
1365
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1366
_mm_shufflehi_epi16 (__m128i __A, const int __mask)
1367
{
1368
  return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
1369
}
1370
 
1371
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1372
_mm_shufflelo_epi16 (__m128i __A, const int __mask)
1373
{
1374
  return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
1375
}
1376
 
1377
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1378
_mm_shuffle_epi32 (__m128i __A, const int __mask)
1379
{
1380
  return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
1381
}
1382
#else
1383
#define _mm_shufflehi_epi16(A, N) \
1384
  ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
1385
#define _mm_shufflelo_epi16(A, N) \
1386
  ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
1387
#define _mm_shuffle_epi32(A, N) \
1388
  ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
1389
#endif
1390
 
1391
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1392
_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
1393
{
1394
  __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
1395
}
1396
 
1397
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1398
_mm_avg_epu8 (__m128i __A, __m128i __B)
1399
{
1400
  return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
1401
}
1402
 
1403
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1404
_mm_avg_epu16 (__m128i __A, __m128i __B)
1405
{
1406
  return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
1407
}
1408
 
1409
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1410
_mm_sad_epu8 (__m128i __A, __m128i __B)
1411
{
1412
  return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
1413
}
1414
 
1415
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1416
_mm_stream_si32 (int *__A, int __B)
1417
{
1418
  __builtin_ia32_movnti (__A, __B);
1419
}
1420
 
1421
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1422
_mm_stream_si128 (__m128i *__A, __m128i __B)
1423
{
1424
  __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
1425
}
1426
 
1427
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1428
_mm_stream_pd (double *__A, __m128d __B)
1429
{
1430
  __builtin_ia32_movntpd (__A, (__v2df)__B);
1431
}
1432
 
1433
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1434
_mm_clflush (void const *__A)
1435
{
1436
  __builtin_ia32_clflush (__A);
1437
}
1438
 
1439
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1440
_mm_lfence (void)
1441
{
1442
  __builtin_ia32_lfence ();
1443
}
1444
 
1445
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1446
_mm_mfence (void)
1447
{
1448
  __builtin_ia32_mfence ();
1449
}
1450
 
1451
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1452
_mm_cvtsi32_si128 (int __A)
1453
{
1454
  return _mm_set_epi32 (0, 0, 0, __A);
1455
}
1456
 
1457
#ifdef __x86_64__
1458
/* Intel intrinsic.  */
1459
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1460
_mm_cvtsi64_si128 (long long __A)
1461
{
1462
  return _mm_set_epi64x (0, __A);
1463
}
1464
 
1465
/* Microsoft intrinsic.  */
1466
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1467
_mm_cvtsi64x_si128 (long long __A)
1468
{
1469
  return _mm_set_epi64x (0, __A);
1470
}
1471
#endif
1472
 
1473
/* Casts between various SP, DP, INT vector types.  Note that these do no
1474
   conversion of values, they just change the type.  */
1475
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1476
_mm_castpd_ps(__m128d __A)
1477
{
1478
  return (__m128) __A;
1479
}
1480
 
1481
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1482
_mm_castpd_si128(__m128d __A)
1483
{
1484
  return (__m128i) __A;
1485
}
1486
 
1487
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1488
_mm_castps_pd(__m128 __A)
1489
{
1490
  return (__m128d) __A;
1491
}
1492
 
1493
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1494
_mm_castps_si128(__m128 __A)
1495
{
1496
  return (__m128i) __A;
1497
}
1498
 
1499
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1500
_mm_castsi128_ps(__m128i __A)
1501
{
1502
  return (__m128) __A;
1503
}
1504
 
1505
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1506
_mm_castsi128_pd(__m128i __A)
1507
{
1508
  return (__m128d) __A;
1509
}
1510
 
1511
#endif /* __SSE2__  */
1512
 
1513
#endif /* _EMMINTRIN_H_INCLUDED */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.