OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.2.2/] [gcc/] [config/] [i386/] [emmintrin.h] - Blame information for rev 248

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
2
 
3
   This file is part of GCC.
4
 
5
   GCC is free software; you can redistribute it and/or modify
6
   it under the terms of the GNU General Public License as published by
7
   the Free Software Foundation; either version 2, or (at your option)
8
   any later version.
9
 
10
   GCC is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU General Public License for more details.
14
 
15
   You should have received a copy of the GNU General Public License
16
   along with GCC; see the file COPYING.  If not, write to
17
   the Free Software Foundation, 51 Franklin Street, Fifth Floor,
18
   Boston, MA 02110-1301, USA.  */
19
 
20
/* As a special exception, if you include this header file into source
21
   files compiled by GCC, this header file does not by itself cause
22
   the resulting executable to be covered by the GNU General Public
23
   License.  This exception does not however invalidate any other
24
   reasons why the executable file might be covered by the GNU General
25
   Public License.  */
26
 
27
/* Implemented from the specification included in the Intel C++ Compiler
28
   User Guide and Reference, version 9.0.  */
29
 
30
#ifndef _EMMINTRIN_H_INCLUDED
31
#define _EMMINTRIN_H_INCLUDED
32
 
33
#ifdef __SSE2__
34
#include <xmmintrin.h>
35
 
36
/* SSE2 */
37
typedef double __v2df __attribute__ ((__vector_size__ (16)));
38
typedef long long __v2di __attribute__ ((__vector_size__ (16)));
39
typedef int __v4si __attribute__ ((__vector_size__ (16)));
40
typedef short __v8hi __attribute__ ((__vector_size__ (16)));
41
typedef char __v16qi __attribute__ ((__vector_size__ (16)));
42
 
43
/* The Intel API is flexible enough that we must allow aliasing with other
44
   vector types, and their scalar components.  */
45
typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
46
typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
47
 
48
/* Create a selector for use with the SHUFPD instruction.  */
49
#define _MM_SHUFFLE2(fp1,fp0) \
50
 (((fp1) << 1) | (fp0))
51
 
52
/* Create a vector with element 0 as F and the rest zero.  */
53
static __inline __m128d __attribute__((__always_inline__))
54
_mm_set_sd (double __F)
55
{
56
  return __extension__ (__m128d){ __F, 0 };
57
}
58
 
59
/* Create a vector with both elements equal to F.  */
60
static __inline __m128d __attribute__((__always_inline__))
61
_mm_set1_pd (double __F)
62
{
63
  return __extension__ (__m128d){ __F, __F };
64
}
65
 
66
static __inline __m128d __attribute__((__always_inline__))
67
_mm_set_pd1 (double __F)
68
{
69
  return _mm_set1_pd (__F);
70
}
71
 
72
/* Create a vector with the lower value X and upper value W.  */
73
static __inline __m128d __attribute__((__always_inline__))
74
_mm_set_pd (double __W, double __X)
75
{
76
  return __extension__ (__m128d){ __X, __W };
77
}
78
 
79
/* Create a vector with the lower value W and upper value X.  */
80
static __inline __m128d __attribute__((__always_inline__))
81
_mm_setr_pd (double __W, double __X)
82
{
83
  return __extension__ (__m128d){ __W, __X };
84
}
85
 
86
/* Create a vector of zeros.  */
87
static __inline __m128d __attribute__((__always_inline__))
88
_mm_setzero_pd (void)
89
{
90
  return __extension__ (__m128d){ 0.0, 0.0 };
91
}
92
 
93
/* Sets the low DPFP value of A from the low value of B.  */
94
static __inline __m128d __attribute__((__always_inline__))
95
_mm_move_sd (__m128d __A, __m128d __B)
96
{
97
  return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
98
}
99
 
100
/* Load two DPFP values from P.  The address must be 16-byte aligned.  */
101
static __inline __m128d __attribute__((__always_inline__))
102
_mm_load_pd (double const *__P)
103
{
104
  return *(__m128d *)__P;
105
}
106
 
107
/* Load two DPFP values from P.  The address need not be 16-byte aligned.  */
108
static __inline __m128d __attribute__((__always_inline__))
109
_mm_loadu_pd (double const *__P)
110
{
111
  return __builtin_ia32_loadupd (__P);
112
}
113
 
114
/* Create a vector with all two elements equal to *P.  */
115
static __inline __m128d __attribute__((__always_inline__))
116
_mm_load1_pd (double const *__P)
117
{
118
  return _mm_set1_pd (*__P);
119
}
120
 
121
/* Create a vector with element 0 as *P and the rest zero.  */
122
static __inline __m128d __attribute__((__always_inline__))
123
_mm_load_sd (double const *__P)
124
{
125
  return _mm_set_sd (*__P);
126
}
127
 
128
static __inline __m128d __attribute__((__always_inline__))
129
_mm_load_pd1 (double const *__P)
130
{
131
  return _mm_load1_pd (__P);
132
}
133
 
134
/* Load two DPFP values in reverse order.  The address must be aligned.  */
135
static __inline __m128d __attribute__((__always_inline__))
136
_mm_loadr_pd (double const *__P)
137
{
138
  __m128d __tmp = _mm_load_pd (__P);
139
  return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
140
}
141
 
142
/* Store two DPFP values.  The address must be 16-byte aligned.  */
143
static __inline void __attribute__((__always_inline__))
144
_mm_store_pd (double *__P, __m128d __A)
145
{
146
  *(__m128d *)__P = __A;
147
}
148
 
149
/* Store two DPFP values.  The address need not be 16-byte aligned.  */
150
static __inline void __attribute__((__always_inline__))
151
_mm_storeu_pd (double *__P, __m128d __A)
152
{
153
  __builtin_ia32_storeupd (__P, __A);
154
}
155
 
156
/* Stores the lower DPFP value.  */
157
static __inline void __attribute__((__always_inline__))
158
_mm_store_sd (double *__P, __m128d __A)
159
{
160
  *__P = __builtin_ia32_vec_ext_v2df (__A, 0);
161
}
162
 
163
static __inline double __attribute__((__always_inline__))
164
_mm_cvtsd_f64 (__m128d __A)
165
{
166
  return __builtin_ia32_vec_ext_v2df (__A, 0);
167
}
168
 
169
static __inline void __attribute__((__always_inline__))
170
_mm_storel_pd (double *__P, __m128d __A)
171
{
172
  _mm_store_sd (__P, __A);
173
}
174
 
175
/* Stores the upper DPFP value.  */
176
static __inline void __attribute__((__always_inline__))
177
_mm_storeh_pd (double *__P, __m128d __A)
178
{
179
  *__P = __builtin_ia32_vec_ext_v2df (__A, 1);
180
}
181
 
182
/* Store the lower DPFP value across two words.
183
   The address must be 16-byte aligned.  */
184
static __inline void __attribute__((__always_inline__))
185
_mm_store1_pd (double *__P, __m128d __A)
186
{
187
  _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
188
}
189
 
190
static __inline void __attribute__((__always_inline__))
191
_mm_store_pd1 (double *__P, __m128d __A)
192
{
193
  _mm_store1_pd (__P, __A);
194
}
195
 
196
/* Store two DPFP values in reverse order.  The address must be aligned.  */
197
static __inline void __attribute__((__always_inline__))
198
_mm_storer_pd (double *__P, __m128d __A)
199
{
200
  _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
201
}
202
 
203
static __inline int __attribute__((__always_inline__))
204
_mm_cvtsi128_si32 (__m128i __A)
205
{
206
  return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
207
}
208
 
209
#ifdef __x86_64__
210
/* Intel intrinsic.  */
211
static __inline long long __attribute__((__always_inline__))
212
_mm_cvtsi128_si64 (__m128i __A)
213
{
214
  return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
215
}
216
 
217
/* Microsoft intrinsic.  */
218
static __inline long long __attribute__((__always_inline__))
219
_mm_cvtsi128_si64x (__m128i __A)
220
{
221
  return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
222
}
223
#endif
224
 
225
static __inline __m128d __attribute__((__always_inline__))
226
_mm_add_pd (__m128d __A, __m128d __B)
227
{
228
  return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
229
}
230
 
231
static __inline __m128d __attribute__((__always_inline__))
232
_mm_add_sd (__m128d __A, __m128d __B)
233
{
234
  return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
235
}
236
 
237
static __inline __m128d __attribute__((__always_inline__))
238
_mm_sub_pd (__m128d __A, __m128d __B)
239
{
240
  return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
241
}
242
 
243
static __inline __m128d __attribute__((__always_inline__))
244
_mm_sub_sd (__m128d __A, __m128d __B)
245
{
246
  return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
247
}
248
 
249
static __inline __m128d __attribute__((__always_inline__))
250
_mm_mul_pd (__m128d __A, __m128d __B)
251
{
252
  return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
253
}
254
 
255
static __inline __m128d __attribute__((__always_inline__))
256
_mm_mul_sd (__m128d __A, __m128d __B)
257
{
258
  return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
259
}
260
 
261
static __inline __m128d __attribute__((__always_inline__))
262
_mm_div_pd (__m128d __A, __m128d __B)
263
{
264
  return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
265
}
266
 
267
static __inline __m128d __attribute__((__always_inline__))
268
_mm_div_sd (__m128d __A, __m128d __B)
269
{
270
  return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
271
}
272
 
273
static __inline __m128d __attribute__((__always_inline__))
274
_mm_sqrt_pd (__m128d __A)
275
{
276
  return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
277
}
278
 
279
/* Return pair {sqrt (A[0), B[1]}.  */
280
static __inline __m128d __attribute__((__always_inline__))
281
_mm_sqrt_sd (__m128d __A, __m128d __B)
282
{
283
  __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
284
  return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
285
}
286
 
287
static __inline __m128d __attribute__((__always_inline__))
288
_mm_min_pd (__m128d __A, __m128d __B)
289
{
290
  return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
291
}
292
 
293
static __inline __m128d __attribute__((__always_inline__))
294
_mm_min_sd (__m128d __A, __m128d __B)
295
{
296
  return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
297
}
298
 
299
static __inline __m128d __attribute__((__always_inline__))
300
_mm_max_pd (__m128d __A, __m128d __B)
301
{
302
  return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
303
}
304
 
305
static __inline __m128d __attribute__((__always_inline__))
306
_mm_max_sd (__m128d __A, __m128d __B)
307
{
308
  return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
309
}
310
 
311
static __inline __m128d __attribute__((__always_inline__))
312
_mm_and_pd (__m128d __A, __m128d __B)
313
{
314
  return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
315
}
316
 
317
static __inline __m128d __attribute__((__always_inline__))
318
_mm_andnot_pd (__m128d __A, __m128d __B)
319
{
320
  return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
321
}
322
 
323
static __inline __m128d __attribute__((__always_inline__))
324
_mm_or_pd (__m128d __A, __m128d __B)
325
{
326
  return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
327
}
328
 
329
static __inline __m128d __attribute__((__always_inline__))
330
_mm_xor_pd (__m128d __A, __m128d __B)
331
{
332
  return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
333
}
334
 
335
static __inline __m128d __attribute__((__always_inline__))
336
_mm_cmpeq_pd (__m128d __A, __m128d __B)
337
{
338
  return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
339
}
340
 
341
static __inline __m128d __attribute__((__always_inline__))
342
_mm_cmplt_pd (__m128d __A, __m128d __B)
343
{
344
  return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
345
}
346
 
347
static __inline __m128d __attribute__((__always_inline__))
348
_mm_cmple_pd (__m128d __A, __m128d __B)
349
{
350
  return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
351
}
352
 
353
static __inline __m128d __attribute__((__always_inline__))
354
_mm_cmpgt_pd (__m128d __A, __m128d __B)
355
{
356
  return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
357
}
358
 
359
static __inline __m128d __attribute__((__always_inline__))
360
_mm_cmpge_pd (__m128d __A, __m128d __B)
361
{
362
  return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
363
}
364
 
365
static __inline __m128d __attribute__((__always_inline__))
366
_mm_cmpneq_pd (__m128d __A, __m128d __B)
367
{
368
  return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
369
}
370
 
371
static __inline __m128d __attribute__((__always_inline__))
372
_mm_cmpnlt_pd (__m128d __A, __m128d __B)
373
{
374
  return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
375
}
376
 
377
static __inline __m128d __attribute__((__always_inline__))
378
_mm_cmpnle_pd (__m128d __A, __m128d __B)
379
{
380
  return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
381
}
382
 
383
static __inline __m128d __attribute__((__always_inline__))
384
_mm_cmpngt_pd (__m128d __A, __m128d __B)
385
{
386
  return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
387
}
388
 
389
static __inline __m128d __attribute__((__always_inline__))
390
_mm_cmpnge_pd (__m128d __A, __m128d __B)
391
{
392
  return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
393
}
394
 
395
static __inline __m128d __attribute__((__always_inline__))
396
_mm_cmpord_pd (__m128d __A, __m128d __B)
397
{
398
  return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
399
}
400
 
401
static __inline __m128d __attribute__((__always_inline__))
402
_mm_cmpunord_pd (__m128d __A, __m128d __B)
403
{
404
  return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
405
}
406
 
407
static __inline __m128d __attribute__((__always_inline__))
408
_mm_cmpeq_sd (__m128d __A, __m128d __B)
409
{
410
  return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
411
}
412
 
413
static __inline __m128d __attribute__((__always_inline__))
414
_mm_cmplt_sd (__m128d __A, __m128d __B)
415
{
416
  return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
417
}
418
 
419
static __inline __m128d __attribute__((__always_inline__))
420
_mm_cmple_sd (__m128d __A, __m128d __B)
421
{
422
  return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
423
}
424
 
425
static __inline __m128d __attribute__((__always_inline__))
426
_mm_cmpgt_sd (__m128d __A, __m128d __B)
427
{
428
  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
429
                                         (__v2df)
430
                                         __builtin_ia32_cmpltsd ((__v2df) __B,
431
                                                                 (__v2df)
432
                                                                 __A));
433
}
434
 
435
static __inline __m128d __attribute__((__always_inline__))
436
_mm_cmpge_sd (__m128d __A, __m128d __B)
437
{
438
  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
439
                                         (__v2df)
440
                                         __builtin_ia32_cmplesd ((__v2df) __B,
441
                                                                 (__v2df)
442
                                                                 __A));
443
}
444
 
445
static __inline __m128d __attribute__((__always_inline__))
446
_mm_cmpneq_sd (__m128d __A, __m128d __B)
447
{
448
  return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
449
}
450
 
451
static __inline __m128d __attribute__((__always_inline__))
452
_mm_cmpnlt_sd (__m128d __A, __m128d __B)
453
{
454
  return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
455
}
456
 
457
static __inline __m128d __attribute__((__always_inline__))
458
_mm_cmpnle_sd (__m128d __A, __m128d __B)
459
{
460
  return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
461
}
462
 
463
static __inline __m128d __attribute__((__always_inline__))
464
_mm_cmpngt_sd (__m128d __A, __m128d __B)
465
{
466
  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
467
                                         (__v2df)
468
                                         __builtin_ia32_cmpnltsd ((__v2df) __B,
469
                                                                  (__v2df)
470
                                                                  __A));
471
}
472
 
473
static __inline __m128d __attribute__((__always_inline__))
474
_mm_cmpnge_sd (__m128d __A, __m128d __B)
475
{
476
  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
477
                                         (__v2df)
478
                                         __builtin_ia32_cmpnlesd ((__v2df) __B,
479
                                                                  (__v2df)
480
                                                                  __A));
481
}
482
 
483
static __inline __m128d __attribute__((__always_inline__))
484
_mm_cmpord_sd (__m128d __A, __m128d __B)
485
{
486
  return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
487
}
488
 
489
static __inline __m128d __attribute__((__always_inline__))
490
_mm_cmpunord_sd (__m128d __A, __m128d __B)
491
{
492
  return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
493
}
494
 
495
static __inline int __attribute__((__always_inline__))
496
_mm_comieq_sd (__m128d __A, __m128d __B)
497
{
498
  return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
499
}
500
 
501
static __inline int __attribute__((__always_inline__))
502
_mm_comilt_sd (__m128d __A, __m128d __B)
503
{
504
  return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
505
}
506
 
507
static __inline int __attribute__((__always_inline__))
508
_mm_comile_sd (__m128d __A, __m128d __B)
509
{
510
  return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
511
}
512
 
513
static __inline int __attribute__((__always_inline__))
514
_mm_comigt_sd (__m128d __A, __m128d __B)
515
{
516
  return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
517
}
518
 
519
static __inline int __attribute__((__always_inline__))
520
_mm_comige_sd (__m128d __A, __m128d __B)
521
{
522
  return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
523
}
524
 
525
static __inline int __attribute__((__always_inline__))
526
_mm_comineq_sd (__m128d __A, __m128d __B)
527
{
528
  return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
529
}
530
 
531
static __inline int __attribute__((__always_inline__))
532
_mm_ucomieq_sd (__m128d __A, __m128d __B)
533
{
534
  return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
535
}
536
 
537
static __inline int __attribute__((__always_inline__))
538
_mm_ucomilt_sd (__m128d __A, __m128d __B)
539
{
540
  return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
541
}
542
 
543
static __inline int __attribute__((__always_inline__))
544
_mm_ucomile_sd (__m128d __A, __m128d __B)
545
{
546
  return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
547
}
548
 
549
static __inline int __attribute__((__always_inline__))
550
_mm_ucomigt_sd (__m128d __A, __m128d __B)
551
{
552
  return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
553
}
554
 
555
static __inline int __attribute__((__always_inline__))
556
_mm_ucomige_sd (__m128d __A, __m128d __B)
557
{
558
  return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
559
}
560
 
561
static __inline int __attribute__((__always_inline__))
562
_mm_ucomineq_sd (__m128d __A, __m128d __B)
563
{
564
  return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
565
}
566
 
567
/* Create a vector of Qi, where i is the element number.  */
568
 
569
static __inline __m128i __attribute__((__always_inline__))
570
_mm_set_epi64x (long long __q1, long long __q0)
571
{
572
  return __extension__ (__m128i)(__v2di){ __q0, __q1 };
573
}
574
 
575
static __inline __m128i __attribute__((__always_inline__))
576
_mm_set_epi64 (__m64 __q1,  __m64 __q0)
577
{
578
  return _mm_set_epi64x ((long long)__q1, (long long)__q0);
579
}
580
 
581
static __inline __m128i __attribute__((__always_inline__))
582
_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
583
{
584
  return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
585
}
586
 
587
static __inline __m128i __attribute__((__always_inline__))
588
_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
589
               short __q3, short __q2, short __q1, short __q0)
590
{
591
  return __extension__ (__m128i)(__v8hi){
592
    __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
593
}
594
 
595
static __inline __m128i __attribute__((__always_inline__))
596
_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
597
              char __q11, char __q10, char __q09, char __q08,
598
              char __q07, char __q06, char __q05, char __q04,
599
              char __q03, char __q02, char __q01, char __q00)
600
{
601
  return __extension__ (__m128i)(__v16qi){
602
    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
603
    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
604
  };
605
}
606
 
607
/* Set all of the elements of the vector to A.  */
608
 
609
static __inline __m128i __attribute__((__always_inline__))
610
_mm_set1_epi64x (long long __A)
611
{
612
  return _mm_set_epi64x (__A, __A);
613
}
614
 
615
static __inline __m128i __attribute__((__always_inline__))
616
_mm_set1_epi64 (__m64 __A)
617
{
618
  return _mm_set_epi64 (__A, __A);
619
}
620
 
621
static __inline __m128i __attribute__((__always_inline__))
622
_mm_set1_epi32 (int __A)
623
{
624
  return _mm_set_epi32 (__A, __A, __A, __A);
625
}
626
 
627
static __inline __m128i __attribute__((__always_inline__))
628
_mm_set1_epi16 (short __A)
629
{
630
  return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
631
}
632
 
633
static __inline __m128i __attribute__((__always_inline__))
634
_mm_set1_epi8 (char __A)
635
{
636
  return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
637
                       __A, __A, __A, __A, __A, __A, __A, __A);
638
}
639
 
640
/* Create a vector of Qi, where i is the element number.
641
   The parameter order is reversed from the _mm_set_epi* functions.  */
642
 
643
static __inline __m128i __attribute__((__always_inline__))
644
_mm_setr_epi64 (__m64 __q0, __m64 __q1)
645
{
646
  return _mm_set_epi64 (__q1, __q0);
647
}
648
 
649
static __inline __m128i __attribute__((__always_inline__))
650
_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
651
{
652
  return _mm_set_epi32 (__q3, __q2, __q1, __q0);
653
}
654
 
655
static __inline __m128i __attribute__((__always_inline__))
656
_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
657
                short __q4, short __q5, short __q6, short __q7)
658
{
659
  return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
660
}
661
 
662
static __inline __m128i __attribute__((__always_inline__))
663
_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
664
               char __q04, char __q05, char __q06, char __q07,
665
               char __q08, char __q09, char __q10, char __q11,
666
               char __q12, char __q13, char __q14, char __q15)
667
{
668
  return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
669
                       __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
670
}
671
 
672
/* Create a vector with element 0 as *P and the rest zero.  */
673
 
674
static __inline __m128i __attribute__((__always_inline__))
675
_mm_load_si128 (__m128i const *__P)
676
{
677
  return *__P;
678
}
679
 
680
static __inline __m128i __attribute__((__always_inline__))
681
_mm_loadu_si128 (__m128i const *__P)
682
{
683
  return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
684
}
685
 
686
static __inline __m128i __attribute__((__always_inline__))
687
_mm_loadl_epi64 (__m128i const *__P)
688
{
689
  return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P);
690
}
691
 
692
static __inline void __attribute__((__always_inline__))
693
_mm_store_si128 (__m128i *__P, __m128i __B)
694
{
695
  *__P = __B;
696
}
697
 
698
static __inline void __attribute__((__always_inline__))
699
_mm_storeu_si128 (__m128i *__P, __m128i __B)
700
{
701
  __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
702
}
703
 
704
static __inline void __attribute__((__always_inline__))
705
_mm_storel_epi64 (__m128i *__P, __m128i __B)
706
{
707
  *(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
708
}
709
 
710
static __inline __m64 __attribute__((__always_inline__))
711
_mm_movepi64_pi64 (__m128i __B)
712
{
713
  return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
714
}
715
 
716
static __inline __m128i __attribute__((__always_inline__))
717
_mm_movpi64_epi64 (__m64 __A)
718
{
719
  return _mm_set_epi64 ((__m64)0LL, __A);
720
}
721
 
722
static __inline __m128i __attribute__((__always_inline__))
723
_mm_move_epi64 (__m128i __A)
724
{
725
  return _mm_set_epi64 ((__m64)0LL, _mm_movepi64_pi64 (__A));
726
}
727
 
728
/* Create a vector of zeros.  */
729
static __inline __m128i __attribute__((__always_inline__))
730
_mm_setzero_si128 (void)
731
{
732
  return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
733
}
734
 
735
static __inline __m128d __attribute__((__always_inline__))
736
_mm_cvtepi32_pd (__m128i __A)
737
{
738
  return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
739
}
740
 
741
static __inline __m128 __attribute__((__always_inline__))
742
_mm_cvtepi32_ps (__m128i __A)
743
{
744
  return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
745
}
746
 
747
static __inline __m128i __attribute__((__always_inline__))
748
_mm_cvtpd_epi32 (__m128d __A)
749
{
750
  return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
751
}
752
 
753
static __inline __m64 __attribute__((__always_inline__))
754
_mm_cvtpd_pi32 (__m128d __A)
755
{
756
  return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
757
}
758
 
759
static __inline __m128 __attribute__((__always_inline__))
760
_mm_cvtpd_ps (__m128d __A)
761
{
762
  return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
763
}
764
 
765
static __inline __m128i __attribute__((__always_inline__))
766
_mm_cvttpd_epi32 (__m128d __A)
767
{
768
  return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
769
}
770
 
771
static __inline __m64 __attribute__((__always_inline__))
772
_mm_cvttpd_pi32 (__m128d __A)
773
{
774
  return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
775
}
776
 
777
static __inline __m128d __attribute__((__always_inline__))
778
_mm_cvtpi32_pd (__m64 __A)
779
{
780
  return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
781
}
782
 
783
static __inline __m128i __attribute__((__always_inline__))
784
_mm_cvtps_epi32 (__m128 __A)
785
{
786
  return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
787
}
788
 
789
static __inline __m128i __attribute__((__always_inline__))
790
_mm_cvttps_epi32 (__m128 __A)
791
{
792
  return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
793
}
794
 
795
static __inline __m128d __attribute__((__always_inline__))
796
_mm_cvtps_pd (__m128 __A)
797
{
798
  return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
799
}
800
 
801
static __inline int __attribute__((__always_inline__))
802
_mm_cvtsd_si32 (__m128d __A)
803
{
804
  return __builtin_ia32_cvtsd2si ((__v2df) __A);
805
}
806
 
807
#ifdef __x86_64__
808
/* Intel intrinsic.  */
809
static __inline long long __attribute__((__always_inline__))
810
_mm_cvtsd_si64 (__m128d __A)
811
{
812
  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
813
}
814
 
815
/* Microsoft intrinsic.  */
816
static __inline long long __attribute__((__always_inline__))
817
_mm_cvtsd_si64x (__m128d __A)
818
{
819
  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
820
}
821
#endif
822
 
823
static __inline int __attribute__((__always_inline__))
824
_mm_cvttsd_si32 (__m128d __A)
825
{
826
  return __builtin_ia32_cvttsd2si ((__v2df) __A);
827
}
828
 
829
#ifdef __x86_64__
830
/* Intel intrinsic.  */
831
static __inline long long __attribute__((__always_inline__))
832
_mm_cvttsd_si64 (__m128d __A)
833
{
834
  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
835
}
836
 
837
/* Microsoft intrinsic.  */
838
static __inline long long __attribute__((__always_inline__))
839
_mm_cvttsd_si64x (__m128d __A)
840
{
841
  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
842
}
843
#endif
844
 
845
static __inline __m128 __attribute__((__always_inline__))
846
_mm_cvtsd_ss (__m128 __A, __m128d __B)
847
{
848
  return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
849
}
850
 
851
static __inline __m128d __attribute__((__always_inline__))
852
_mm_cvtsi32_sd (__m128d __A, int __B)
853
{
854
  return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
855
}
856
 
857
#ifdef __x86_64__
858
/* Intel intrinsic.  */
859
static __inline __m128d __attribute__((__always_inline__))
860
_mm_cvtsi64_sd (__m128d __A, long long __B)
861
{
862
  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
863
}
864
 
865
/* Microsoft intrinsic.  */
866
static __inline __m128d __attribute__((__always_inline__))
867
_mm_cvtsi64x_sd (__m128d __A, long long __B)
868
{
869
  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
870
}
871
#endif
872
 
873
static __inline __m128d __attribute__((__always_inline__))
874
_mm_cvtss_sd (__m128d __A, __m128 __B)
875
{
876
  return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
877
}
878
 
879
#define _mm_shuffle_pd(__A, __B, __C) ((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (__C)))
880
 
881
static __inline __m128d __attribute__((__always_inline__))
882
_mm_unpackhi_pd (__m128d __A, __m128d __B)
883
{
884
  return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
885
}
886
 
887
static __inline __m128d __attribute__((__always_inline__))
888
_mm_unpacklo_pd (__m128d __A, __m128d __B)
889
{
890
  return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
891
}
892
 
893
static __inline __m128d __attribute__((__always_inline__))
894
_mm_loadh_pd (__m128d __A, double const *__B)
895
{
896
  return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
897
}
898
 
899
static __inline __m128d __attribute__((__always_inline__))
900
_mm_loadl_pd (__m128d __A, double const *__B)
901
{
902
  return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
903
}
904
 
905
static __inline int __attribute__((__always_inline__))
906
_mm_movemask_pd (__m128d __A)
907
{
908
  return __builtin_ia32_movmskpd ((__v2df)__A);
909
}
910
 
911
static __inline __m128i __attribute__((__always_inline__))
912
_mm_packs_epi16 (__m128i __A, __m128i __B)
913
{
914
  return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
915
}
916
 
917
static __inline __m128i __attribute__((__always_inline__))
918
_mm_packs_epi32 (__m128i __A, __m128i __B)
919
{
920
  return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
921
}
922
 
923
static __inline __m128i __attribute__((__always_inline__))
924
_mm_packus_epi16 (__m128i __A, __m128i __B)
925
{
926
  return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
927
}
928
 
929
static __inline __m128i __attribute__((__always_inline__))
930
_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
931
{
932
  return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
933
}
934
 
935
static __inline __m128i __attribute__((__always_inline__))
936
_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
937
{
938
  return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
939
}
940
 
941
static __inline __m128i __attribute__((__always_inline__))
942
_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
943
{
944
  return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
945
}
946
 
947
static __inline __m128i __attribute__((__always_inline__))
948
_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
949
{
950
  return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
951
}
952
 
953
static __inline __m128i __attribute__((__always_inline__))
954
_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
955
{
956
  return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
957
}
958
 
959
static __inline __m128i __attribute__((__always_inline__))
960
_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
961
{
962
  return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
963
}
964
 
965
static __inline __m128i __attribute__((__always_inline__))
966
_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
967
{
968
  return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
969
}
970
 
971
static __inline __m128i __attribute__((__always_inline__))
972
_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
973
{
974
  return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
975
}
976
 
977
static __inline __m128i __attribute__((__always_inline__))
978
_mm_add_epi8 (__m128i __A, __m128i __B)
979
{
980
  return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
981
}
982
 
983
static __inline __m128i __attribute__((__always_inline__))
984
_mm_add_epi16 (__m128i __A, __m128i __B)
985
{
986
  return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
987
}
988
 
989
static __inline __m128i __attribute__((__always_inline__))
990
_mm_add_epi32 (__m128i __A, __m128i __B)
991
{
992
  return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
993
}
994
 
995
static __inline __m128i __attribute__((__always_inline__))
996
_mm_add_epi64 (__m128i __A, __m128i __B)
997
{
998
  return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
999
}
1000
 
1001
static __inline __m128i __attribute__((__always_inline__))
1002
_mm_adds_epi8 (__m128i __A, __m128i __B)
1003
{
1004
  return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
1005
}
1006
 
1007
static __inline __m128i __attribute__((__always_inline__))
1008
_mm_adds_epi16 (__m128i __A, __m128i __B)
1009
{
1010
  return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
1011
}
1012
 
1013
static __inline __m128i __attribute__((__always_inline__))
1014
_mm_adds_epu8 (__m128i __A, __m128i __B)
1015
{
1016
  return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
1017
}
1018
 
1019
static __inline __m128i __attribute__((__always_inline__))
1020
_mm_adds_epu16 (__m128i __A, __m128i __B)
1021
{
1022
  return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
1023
}
1024
 
1025
static __inline __m128i __attribute__((__always_inline__))
1026
_mm_sub_epi8 (__m128i __A, __m128i __B)
1027
{
1028
  return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
1029
}
1030
 
1031
static __inline __m128i __attribute__((__always_inline__))
1032
_mm_sub_epi16 (__m128i __A, __m128i __B)
1033
{
1034
  return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
1035
}
1036
 
1037
static __inline __m128i __attribute__((__always_inline__))
1038
_mm_sub_epi32 (__m128i __A, __m128i __B)
1039
{
1040
  return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
1041
}
1042
 
1043
static __inline __m128i __attribute__((__always_inline__))
1044
_mm_sub_epi64 (__m128i __A, __m128i __B)
1045
{
1046
  return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
1047
}
1048
 
1049
static __inline __m128i __attribute__((__always_inline__))
1050
_mm_subs_epi8 (__m128i __A, __m128i __B)
1051
{
1052
  return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
1053
}
1054
 
1055
static __inline __m128i __attribute__((__always_inline__))
1056
_mm_subs_epi16 (__m128i __A, __m128i __B)
1057
{
1058
  return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
1059
}
1060
 
1061
static __inline __m128i __attribute__((__always_inline__))
1062
_mm_subs_epu8 (__m128i __A, __m128i __B)
1063
{
1064
  return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
1065
}
1066
 
1067
static __inline __m128i __attribute__((__always_inline__))
1068
_mm_subs_epu16 (__m128i __A, __m128i __B)
1069
{
1070
  return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
1071
}
1072
 
1073
static __inline __m128i __attribute__((__always_inline__))
1074
_mm_madd_epi16 (__m128i __A, __m128i __B)
1075
{
1076
  return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
1077
}
1078
 
1079
static __inline __m128i __attribute__((__always_inline__))
1080
_mm_mulhi_epi16 (__m128i __A, __m128i __B)
1081
{
1082
  return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
1083
}
1084
 
1085
static __inline __m128i __attribute__((__always_inline__))
1086
_mm_mullo_epi16 (__m128i __A, __m128i __B)
1087
{
1088
  return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
1089
}
1090
 
1091
static __inline __m64 __attribute__((__always_inline__))
1092
_mm_mul_su32 (__m64 __A, __m64 __B)
1093
{
1094
  return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
1095
}
1096
 
1097
static __inline __m128i __attribute__((__always_inline__))
1098
_mm_mul_epu32 (__m128i __A, __m128i __B)
1099
{
1100
  return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
1101
}
1102
 
1103
#if 0
1104
static __inline __m128i __attribute__((__always_inline__))
1105
_mm_slli_epi16 (__m128i __A, int __B)
1106
{
1107
  return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
1108
}
1109
 
1110
static __inline __m128i __attribute__((__always_inline__))
1111
_mm_slli_epi32 (__m128i __A, int __B)
1112
{
1113
  return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
1114
}
1115
 
1116
static __inline __m128i __attribute__((__always_inline__))
1117
_mm_slli_epi64 (__m128i __A, int __B)
1118
{
1119
  return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
1120
}
1121
#else
1122
#define _mm_slli_epi16(__A, __B) \
1123
  ((__m128i)__builtin_ia32_psllwi128 ((__v8hi)(__A), __B))
1124
#define _mm_slli_epi32(__A, __B) \
1125
  ((__m128i)__builtin_ia32_pslldi128 ((__v8hi)(__A), __B))
1126
#define _mm_slli_epi64(__A, __B) \
1127
  ((__m128i)__builtin_ia32_psllqi128 ((__v8hi)(__A), __B))
1128
#endif
1129
 
1130
#if 0
1131
static __inline __m128i __attribute__((__always_inline__))
1132
_mm_srai_epi16 (__m128i __A, int __B)
1133
{
1134
  return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
1135
}
1136
 
1137
static __inline __m128i __attribute__((__always_inline__))
1138
_mm_srai_epi32 (__m128i __A, int __B)
1139
{
1140
  return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
1141
}
1142
#else
1143
#define _mm_srai_epi16(__A, __B) \
1144
  ((__m128i)__builtin_ia32_psrawi128 ((__v8hi)(__A), __B))
1145
#define _mm_srai_epi32(__A, __B) \
1146
  ((__m128i)__builtin_ia32_psradi128 ((__v8hi)(__A), __B))
1147
#endif
1148
 
1149
#if 0
1150
static __m128i __attribute__((__always_inline__))
1151
_mm_srli_si128 (__m128i __A, int __B)
1152
{
1153
  return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B * 8));
1154
}
1155
 
1156
static __m128i __attribute__((__always_inline__))
1157
_mm_srli_si128 (__m128i __A, int __B)
1158
{
1159
  return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B * 8));
1160
}
1161
#else
1162
#define _mm_srli_si128(__A, __B) \
1163
  ((__m128i)__builtin_ia32_psrldqi128 (__A, (__B) * 8))
1164
#define _mm_slli_si128(__A, __B) \
1165
  ((__m128i)__builtin_ia32_pslldqi128 (__A, (__B) * 8))
1166
#endif
1167
 
1168
#if 0
1169
static __inline __m128i __attribute__((__always_inline__))
1170
_mm_srli_epi16 (__m128i __A, int __B)
1171
{
1172
  return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
1173
}
1174
 
1175
static __inline __m128i __attribute__((__always_inline__))
1176
_mm_srli_epi32 (__m128i __A, int __B)
1177
{
1178
  return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
1179
}
1180
 
1181
static __inline __m128i __attribute__((__always_inline__))
1182
_mm_srli_epi64 (__m128i __A, int __B)
1183
{
1184
  return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
1185
}
1186
#else
1187
#define _mm_srli_epi16(__A, __B) \
1188
  ((__m128i)__builtin_ia32_psrlwi128 ((__v8hi)(__A), __B))
1189
#define _mm_srli_epi32(__A, __B) \
1190
  ((__m128i)__builtin_ia32_psrldi128 ((__v4si)(__A), __B))
1191
#define _mm_srli_epi64(__A, __B) \
1192
  ((__m128i)__builtin_ia32_psrlqi128 ((__v4si)(__A), __B))
1193
#endif
1194
 
1195
static __inline __m128i __attribute__((__always_inline__))
1196
_mm_sll_epi16 (__m128i __A, __m128i __B)
1197
{
1198
  return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
1199
}
1200
 
1201
static __inline __m128i __attribute__((__always_inline__))
1202
_mm_sll_epi32 (__m128i __A, __m128i __B)
1203
{
1204
  return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
1205
}
1206
 
1207
static __inline __m128i __attribute__((__always_inline__))
1208
_mm_sll_epi64 (__m128i __A, __m128i __B)
1209
{
1210
  return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
1211
}
1212
 
1213
static __inline __m128i __attribute__((__always_inline__))
1214
_mm_sra_epi16 (__m128i __A, __m128i __B)
1215
{
1216
  return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
1217
}
1218
 
1219
static __inline __m128i __attribute__((__always_inline__))
1220
_mm_sra_epi32 (__m128i __A, __m128i __B)
1221
{
1222
  return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
1223
}
1224
 
1225
static __inline __m128i __attribute__((__always_inline__))
1226
_mm_srl_epi16 (__m128i __A, __m128i __B)
1227
{
1228
  return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
1229
}
1230
 
1231
static __inline __m128i __attribute__((__always_inline__))
1232
_mm_srl_epi32 (__m128i __A, __m128i __B)
1233
{
1234
  return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
1235
}
1236
 
1237
static __inline __m128i __attribute__((__always_inline__))
1238
_mm_srl_epi64 (__m128i __A, __m128i __B)
1239
{
1240
  return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
1241
}
1242
 
1243
static __inline __m128i __attribute__((__always_inline__))
1244
_mm_and_si128 (__m128i __A, __m128i __B)
1245
{
1246
  return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
1247
}
1248
 
1249
static __inline __m128i __attribute__((__always_inline__))
1250
_mm_andnot_si128 (__m128i __A, __m128i __B)
1251
{
1252
  return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
1253
}
1254
 
1255
static __inline __m128i __attribute__((__always_inline__))
1256
_mm_or_si128 (__m128i __A, __m128i __B)
1257
{
1258
  return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
1259
}
1260
 
1261
static __inline __m128i __attribute__((__always_inline__))
1262
_mm_xor_si128 (__m128i __A, __m128i __B)
1263
{
1264
  return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
1265
}
1266
 
1267
static __inline __m128i __attribute__((__always_inline__))
1268
_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
1269
{
1270
  return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
1271
}
1272
 
1273
static __inline __m128i __attribute__((__always_inline__))
1274
_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
1275
{
1276
  return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
1277
}
1278
 
1279
static __inline __m128i __attribute__((__always_inline__))
1280
_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
1281
{
1282
  return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
1283
}
1284
 
1285
static __inline __m128i __attribute__((__always_inline__))
1286
_mm_cmplt_epi8 (__m128i __A, __m128i __B)
1287
{
1288
  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
1289
}
1290
 
1291
static __inline __m128i __attribute__((__always_inline__))
1292
_mm_cmplt_epi16 (__m128i __A, __m128i __B)
1293
{
1294
  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
1295
}
1296
 
1297
static __inline __m128i __attribute__((__always_inline__))
1298
_mm_cmplt_epi32 (__m128i __A, __m128i __B)
1299
{
1300
  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
1301
}
1302
 
1303
static __inline __m128i __attribute__((__always_inline__))
1304
_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
1305
{
1306
  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
1307
}
1308
 
1309
static __inline __m128i __attribute__((__always_inline__))
1310
_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
1311
{
1312
  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
1313
}
1314
 
1315
static __inline __m128i __attribute__((__always_inline__))
1316
_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
1317
{
1318
  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
1319
}
1320
 
1321
#if 0
1322
static __inline int __attribute__((__always_inline__))
1323
_mm_extract_epi16 (__m128i const __A, int const __N)
1324
{
1325
  return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
1326
}
1327
 
1328
static __inline __m128i __attribute__((__always_inline__))
1329
_mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
1330
{
1331
  return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
1332
}
1333
#else
1334
#define _mm_extract_epi16(A, N) \
1335
  ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(A), (N)))
1336
#define _mm_insert_epi16(A, D, N) \
1337
  ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(A), (D), (N)))
1338
#endif
1339
 
1340
static __inline __m128i __attribute__((__always_inline__))
1341
_mm_max_epi16 (__m128i __A, __m128i __B)
1342
{
1343
  return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
1344
}
1345
 
1346
static __inline __m128i __attribute__((__always_inline__))
1347
_mm_max_epu8 (__m128i __A, __m128i __B)
1348
{
1349
  return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
1350
}
1351
 
1352
static __inline __m128i __attribute__((__always_inline__))
1353
_mm_min_epi16 (__m128i __A, __m128i __B)
1354
{
1355
  return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
1356
}
1357
 
1358
static __inline __m128i __attribute__((__always_inline__))
1359
_mm_min_epu8 (__m128i __A, __m128i __B)
1360
{
1361
  return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
1362
}
1363
 
1364
static __inline int __attribute__((__always_inline__))
1365
_mm_movemask_epi8 (__m128i __A)
1366
{
1367
  return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
1368
}
1369
 
1370
static __inline __m128i __attribute__((__always_inline__))
1371
_mm_mulhi_epu16 (__m128i __A, __m128i __B)
1372
{
1373
  return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
1374
}
1375
 
1376
#define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B))
1377
#define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B))
1378
#define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B))
1379
 
1380
static __inline void __attribute__((__always_inline__))
1381
_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
1382
{
1383
  __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
1384
}
1385
 
1386
static __inline __m128i __attribute__((__always_inline__))
1387
_mm_avg_epu8 (__m128i __A, __m128i __B)
1388
{
1389
  return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
1390
}
1391
 
1392
static __inline __m128i __attribute__((__always_inline__))
1393
_mm_avg_epu16 (__m128i __A, __m128i __B)
1394
{
1395
  return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
1396
}
1397
 
1398
static __inline __m128i __attribute__((__always_inline__))
1399
_mm_sad_epu8 (__m128i __A, __m128i __B)
1400
{
1401
  return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
1402
}
1403
 
1404
static __inline void __attribute__((__always_inline__))
1405
_mm_stream_si32 (int *__A, int __B)
1406
{
1407
  __builtin_ia32_movnti (__A, __B);
1408
}
1409
 
1410
static __inline void __attribute__((__always_inline__))
1411
_mm_stream_si128 (__m128i *__A, __m128i __B)
1412
{
1413
  __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
1414
}
1415
 
1416
static __inline void __attribute__((__always_inline__))
1417
_mm_stream_pd (double *__A, __m128d __B)
1418
{
1419
  __builtin_ia32_movntpd (__A, (__v2df)__B);
1420
}
1421
 
1422
static __inline void __attribute__((__always_inline__))
1423
_mm_clflush (void const *__A)
1424
{
1425
  __builtin_ia32_clflush (__A);
1426
}
1427
 
1428
static __inline void __attribute__((__always_inline__))
1429
_mm_lfence (void)
1430
{
1431
  __builtin_ia32_lfence ();
1432
}
1433
 
1434
static __inline void __attribute__((__always_inline__))
1435
_mm_mfence (void)
1436
{
1437
  __builtin_ia32_mfence ();
1438
}
1439
 
1440
static __inline __m128i __attribute__((__always_inline__))
1441
_mm_cvtsi32_si128 (int __A)
1442
{
1443
  return _mm_set_epi32 (0, 0, 0, __A);
1444
}
1445
 
1446
#ifdef __x86_64__
1447
/* Intel intrinsic.  */
1448
static __inline __m128i __attribute__((__always_inline__))
1449
_mm_cvtsi64_si128 (long long __A)
1450
{
1451
  return _mm_set_epi64x (0, __A);
1452
}
1453
 
1454
/* Microsoft intrinsic.  */
1455
static __inline __m128i __attribute__((__always_inline__))
1456
_mm_cvtsi64x_si128 (long long __A)
1457
{
1458
  return _mm_set_epi64x (0, __A);
1459
}
1460
#endif
1461
 
1462
/* Casts between various SP, DP, INT vector types.  Note that these do no
1463
   conversion of values, they just change the type.  */
1464
static __inline __m128 __attribute__((__always_inline__))
1465
_mm_castpd_ps(__m128d __A)
1466
{
1467
  return (__m128) __A;
1468
}
1469
 
1470
static __inline __m128i __attribute__((__always_inline__))
1471
_mm_castpd_si128(__m128d __A)
1472
{
1473
  return (__m128i) __A;
1474
}
1475
 
1476
static __inline __m128d __attribute__((__always_inline__))
1477
_mm_castps_pd(__m128 __A)
1478
{
1479
  return (__m128d) __A;
1480
}
1481
 
1482
static __inline __m128i __attribute__((__always_inline__))
1483
_mm_castps_si128(__m128 __A)
1484
{
1485
  return (__m128i) __A;
1486
}
1487
 
1488
static __inline __m128 __attribute__((__always_inline__))
1489
_mm_castsi128_ps(__m128i __A)
1490
{
1491
  return (__m128) __A;
1492
}
1493
 
1494
static __inline __m128d __attribute__((__always_inline__))
1495
_mm_castsi128_pd(__m128i __A)
1496
{
1497
  return (__m128d) __A;
1498
}
1499
 
1500
#endif /* __SSE2__  */
1501
 
1502
#endif /* _EMMINTRIN_H_INCLUDED */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.