OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-old/] [gcc-4.2.2/] [gcc/] [config/] [i386/] [mmintrin.h] - Blame information for rev 154

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007
2
   Free Software Foundation, Inc.
3
 
4
   This file is part of GCC.
5
 
6
   GCC is free software; you can redistribute it and/or modify
7
   it under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 2, or (at your option)
9
   any later version.
10
 
11
   GCC is distributed in the hope that it will be useful,
12
   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
   GNU General Public License for more details.
15
 
16
   You should have received a copy of the GNU General Public License
17
   along with GCC; see the file COPYING.  If not, write to
18
   the Free Software Foundation, 51 Franklin Street, Fifth Floor,
19
   Boston, MA 02110-1301, USA.  */
20
 
21
/* As a special exception, if you include this header file into source
22
   files compiled by GCC, this header file does not by itself cause
23
   the resulting executable to be covered by the GNU General Public
24
   License.  This exception does not however invalidate any other
25
   reasons why the executable file might be covered by the GNU General
26
   Public License.  */
27
 
28
/* Implemented from the specification included in the Intel C++ Compiler
29
   User Guide and Reference, version 9.0.  */
30
 
31
#ifndef _MMINTRIN_H_INCLUDED
32
#define _MMINTRIN_H_INCLUDED
33
 
34
#ifndef __MMX__
35
# error "MMX instruction set not enabled"
36
#else
37
/* The Intel API is flexible enough that we must allow aliasing with other
38
   vector types, and their scalar components.  */
39
typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
40
 
41
/* Internal data types for implementing the intrinsics.  */
42
typedef int __v2si __attribute__ ((__vector_size__ (8)));
43
typedef short __v4hi __attribute__ ((__vector_size__ (8)));
44
typedef char __v8qi __attribute__ ((__vector_size__ (8)));
45
 
46
/* Empty the multimedia state.  */
47
static __inline void __attribute__((__always_inline__))
48
_mm_empty (void)
49
{
50
  __builtin_ia32_emms ();
51
}
52
 
53
static __inline void __attribute__((__always_inline__))
54
_m_empty (void)
55
{
56
  _mm_empty ();
57
}
58
 
59
/* Convert I to a __m64 object.  The integer is zero-extended to 64-bits.  */
60
static __inline __m64  __attribute__((__always_inline__))
61
_mm_cvtsi32_si64 (int __i)
62
{
63
  return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
64
}
65
 
66
static __inline __m64  __attribute__((__always_inline__))
67
_m_from_int (int __i)
68
{
69
  return _mm_cvtsi32_si64 (__i);
70
}
71
 
72
#ifdef __x86_64__
73
/* Convert I to a __m64 object.  */
74
 
75
/* Intel intrinsic.  */
76
static __inline __m64  __attribute__((__always_inline__))
77
_m_from_int64 (long long __i)
78
{
79
  return (__m64) __i;
80
}
81
 
82
static __inline __m64  __attribute__((__always_inline__))
83
_mm_cvtsi64_m64 (long long __i)
84
{
85
  return (__m64) __i;
86
}
87
 
88
/* Microsoft intrinsic.  */
89
static __inline __m64  __attribute__((__always_inline__))
90
_mm_cvtsi64x_si64 (long long __i)
91
{
92
  return (__m64) __i;
93
}
94
 
95
static __inline __m64  __attribute__((__always_inline__))
96
_mm_set_pi64x (long long __i)
97
{
98
  return (__m64) __i;
99
}
100
#endif
101
 
102
/* Convert the lower 32 bits of the __m64 object into an integer.  */
103
static __inline int __attribute__((__always_inline__))
104
_mm_cvtsi64_si32 (__m64 __i)
105
{
106
  return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
107
}
108
 
109
static __inline int __attribute__((__always_inline__))
110
_m_to_int (__m64 __i)
111
{
112
  return _mm_cvtsi64_si32 (__i);
113
}
114
 
115
#ifdef __x86_64__
116
/* Convert the __m64 object to a 64bit integer.  */
117
 
118
/* Intel intrinsic.  */
119
static __inline long long __attribute__((__always_inline__))
120
_m_to_int64 (__m64 __i)
121
{
122
  return (long long)__i;
123
}
124
 
125
static __inline long long __attribute__((__always_inline__))
126
_mm_cvtm64_si64 (__m64 __i)
127
{
128
  return (long long)__i;
129
}
130
 
131
/* Microsoft intrinsic.  */
132
static __inline long long __attribute__((__always_inline__))
133
_mm_cvtsi64_si64x (__m64 __i)
134
{
135
  return (long long)__i;
136
}
137
#endif
138
 
139
/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
140
   the result, and the four 16-bit values from M2 into the upper four 8-bit
141
   values of the result, all with signed saturation.  */
142
static __inline __m64 __attribute__((__always_inline__))
143
_mm_packs_pi16 (__m64 __m1, __m64 __m2)
144
{
145
  return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
146
}
147
 
148
static __inline __m64 __attribute__((__always_inline__))
149
_m_packsswb (__m64 __m1, __m64 __m2)
150
{
151
  return _mm_packs_pi16 (__m1, __m2);
152
}
153
 
154
/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
155
   the result, and the two 32-bit values from M2 into the upper two 16-bit
156
   values of the result, all with signed saturation.  */
157
static __inline __m64 __attribute__((__always_inline__))
158
_mm_packs_pi32 (__m64 __m1, __m64 __m2)
159
{
160
  return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
161
}
162
 
163
static __inline __m64 __attribute__((__always_inline__))
164
_m_packssdw (__m64 __m1, __m64 __m2)
165
{
166
  return _mm_packs_pi32 (__m1, __m2);
167
}
168
 
169
/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
170
   the result, and the four 16-bit values from M2 into the upper four 8-bit
171
   values of the result, all with unsigned saturation.  */
172
static __inline __m64 __attribute__((__always_inline__))
173
_mm_packs_pu16 (__m64 __m1, __m64 __m2)
174
{
175
  return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
176
}
177
 
178
static __inline __m64 __attribute__((__always_inline__))
179
_m_packuswb (__m64 __m1, __m64 __m2)
180
{
181
  return _mm_packs_pu16 (__m1, __m2);
182
}
183
 
184
/* Interleave the four 8-bit values from the high half of M1 with the four
185
   8-bit values from the high half of M2.  */
186
static __inline __m64 __attribute__((__always_inline__))
187
_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
188
{
189
  return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
190
}
191
 
192
static __inline __m64 __attribute__((__always_inline__))
193
_m_punpckhbw (__m64 __m1, __m64 __m2)
194
{
195
  return _mm_unpackhi_pi8 (__m1, __m2);
196
}
197
 
198
/* Interleave the two 16-bit values from the high half of M1 with the two
199
   16-bit values from the high half of M2.  */
200
static __inline __m64 __attribute__((__always_inline__))
201
_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
202
{
203
  return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
204
}
205
 
206
static __inline __m64 __attribute__((__always_inline__))
207
_m_punpckhwd (__m64 __m1, __m64 __m2)
208
{
209
  return _mm_unpackhi_pi16 (__m1, __m2);
210
}
211
 
212
/* Interleave the 32-bit value from the high half of M1 with the 32-bit
213
   value from the high half of M2.  */
214
static __inline __m64 __attribute__((__always_inline__))
215
_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
216
{
217
  return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
218
}
219
 
220
static __inline __m64 __attribute__((__always_inline__))
221
_m_punpckhdq (__m64 __m1, __m64 __m2)
222
{
223
  return _mm_unpackhi_pi32 (__m1, __m2);
224
}
225
 
226
/* Interleave the four 8-bit values from the low half of M1 with the four
227
   8-bit values from the low half of M2.  */
228
static __inline __m64 __attribute__((__always_inline__))
229
_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
230
{
231
  return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
232
}
233
 
234
static __inline __m64 __attribute__((__always_inline__))
235
_m_punpcklbw (__m64 __m1, __m64 __m2)
236
{
237
  return _mm_unpacklo_pi8 (__m1, __m2);
238
}
239
 
240
/* Interleave the two 16-bit values from the low half of M1 with the two
241
   16-bit values from the low half of M2.  */
242
static __inline __m64 __attribute__((__always_inline__))
243
_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
244
{
245
  return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
246
}
247
 
248
static __inline __m64 __attribute__((__always_inline__))
249
_m_punpcklwd (__m64 __m1, __m64 __m2)
250
{
251
  return _mm_unpacklo_pi16 (__m1, __m2);
252
}
253
 
254
/* Interleave the 32-bit value from the low half of M1 with the 32-bit
255
   value from the low half of M2.  */
256
static __inline __m64 __attribute__((__always_inline__))
257
_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
258
{
259
  return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
260
}
261
 
262
static __inline __m64 __attribute__((__always_inline__))
263
_m_punpckldq (__m64 __m1, __m64 __m2)
264
{
265
  return _mm_unpacklo_pi32 (__m1, __m2);
266
}
267
 
268
/* Add the 8-bit values in M1 to the 8-bit values in M2.  */
269
static __inline __m64 __attribute__((__always_inline__))
270
_mm_add_pi8 (__m64 __m1, __m64 __m2)
271
{
272
  return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
273
}
274
 
275
static __inline __m64 __attribute__((__always_inline__))
276
_m_paddb (__m64 __m1, __m64 __m2)
277
{
278
  return _mm_add_pi8 (__m1, __m2);
279
}
280
 
281
/* Add the 16-bit values in M1 to the 16-bit values in M2.  */
282
static __inline __m64 __attribute__((__always_inline__))
283
_mm_add_pi16 (__m64 __m1, __m64 __m2)
284
{
285
  return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
286
}
287
 
288
static __inline __m64 __attribute__((__always_inline__))
289
_m_paddw (__m64 __m1, __m64 __m2)
290
{
291
  return _mm_add_pi16 (__m1, __m2);
292
}
293
 
294
/* Add the 32-bit values in M1 to the 32-bit values in M2.  */
295
static __inline __m64 __attribute__((__always_inline__))
296
_mm_add_pi32 (__m64 __m1, __m64 __m2)
297
{
298
  return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
299
}
300
 
301
static __inline __m64 __attribute__((__always_inline__))
302
_m_paddd (__m64 __m1, __m64 __m2)
303
{
304
  return _mm_add_pi32 (__m1, __m2);
305
}
306
 
307
/* Add the 64-bit values in M1 to the 64-bit values in M2.  */
308
#ifdef __SSE2__
309
static __inline __m64 __attribute__((__always_inline__))
310
_mm_add_si64 (__m64 __m1, __m64 __m2)
311
{
312
  return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2);
313
}
314
#endif
315
 
316
/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
317
   saturated arithmetic.  */
318
static __inline __m64 __attribute__((__always_inline__))
319
_mm_adds_pi8 (__m64 __m1, __m64 __m2)
320
{
321
  return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
322
}
323
 
324
static __inline __m64 __attribute__((__always_inline__))
325
_m_paddsb (__m64 __m1, __m64 __m2)
326
{
327
  return _mm_adds_pi8 (__m1, __m2);
328
}
329
 
330
/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
331
   saturated arithmetic.  */
332
static __inline __m64 __attribute__((__always_inline__))
333
_mm_adds_pi16 (__m64 __m1, __m64 __m2)
334
{
335
  return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
336
}
337
 
338
static __inline __m64 __attribute__((__always_inline__))
339
_m_paddsw (__m64 __m1, __m64 __m2)
340
{
341
  return _mm_adds_pi16 (__m1, __m2);
342
}
343
 
344
/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
345
   saturated arithmetic.  */
346
static __inline __m64 __attribute__((__always_inline__))
347
_mm_adds_pu8 (__m64 __m1, __m64 __m2)
348
{
349
  return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
350
}
351
 
352
static __inline __m64 __attribute__((__always_inline__))
353
_m_paddusb (__m64 __m1, __m64 __m2)
354
{
355
  return _mm_adds_pu8 (__m1, __m2);
356
}
357
 
358
/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
359
   saturated arithmetic.  */
360
static __inline __m64 __attribute__((__always_inline__))
361
_mm_adds_pu16 (__m64 __m1, __m64 __m2)
362
{
363
  return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
364
}
365
 
366
static __inline __m64 __attribute__((__always_inline__))
367
_m_paddusw (__m64 __m1, __m64 __m2)
368
{
369
  return _mm_adds_pu16 (__m1, __m2);
370
}
371
 
372
/* Subtract the 8-bit values in M2 from the 8-bit values in M1.  */
373
static __inline __m64 __attribute__((__always_inline__))
374
_mm_sub_pi8 (__m64 __m1, __m64 __m2)
375
{
376
  return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
377
}
378
 
379
static __inline __m64 __attribute__((__always_inline__))
380
_m_psubb (__m64 __m1, __m64 __m2)
381
{
382
  return _mm_sub_pi8 (__m1, __m2);
383
}
384
 
385
/* Subtract the 16-bit values in M2 from the 16-bit values in M1.  */
386
static __inline __m64 __attribute__((__always_inline__))
387
_mm_sub_pi16 (__m64 __m1, __m64 __m2)
388
{
389
  return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
390
}
391
 
392
static __inline __m64 __attribute__((__always_inline__))
393
_m_psubw (__m64 __m1, __m64 __m2)
394
{
395
  return _mm_sub_pi16 (__m1, __m2);
396
}
397
 
398
/* Subtract the 32-bit values in M2 from the 32-bit values in M1.  */
399
static __inline __m64 __attribute__((__always_inline__))
400
_mm_sub_pi32 (__m64 __m1, __m64 __m2)
401
{
402
  return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
403
}
404
 
405
static __inline __m64 __attribute__((__always_inline__))
406
_m_psubd (__m64 __m1, __m64 __m2)
407
{
408
  return _mm_sub_pi32 (__m1, __m2);
409
}
410
 
411
/* Add the 64-bit values in M1 to the 64-bit values in M2.  */
412
#ifdef __SSE2__
413
static __inline __m64 __attribute__((__always_inline__))
414
_mm_sub_si64 (__m64 __m1, __m64 __m2)
415
{
416
  return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2);
417
}
418
#endif
419
 
420
/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
421
   saturating arithmetic.  */
422
static __inline __m64 __attribute__((__always_inline__))
423
_mm_subs_pi8 (__m64 __m1, __m64 __m2)
424
{
425
  return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
426
}
427
 
428
static __inline __m64 __attribute__((__always_inline__))
429
_m_psubsb (__m64 __m1, __m64 __m2)
430
{
431
  return _mm_subs_pi8 (__m1, __m2);
432
}
433
 
434
/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
435
   signed saturating arithmetic.  */
436
static __inline __m64 __attribute__((__always_inline__))
437
_mm_subs_pi16 (__m64 __m1, __m64 __m2)
438
{
439
  return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
440
}
441
 
442
static __inline __m64 __attribute__((__always_inline__))
443
_m_psubsw (__m64 __m1, __m64 __m2)
444
{
445
  return _mm_subs_pi16 (__m1, __m2);
446
}
447
 
448
/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
449
   unsigned saturating arithmetic.  */
450
static __inline __m64 __attribute__((__always_inline__))
451
_mm_subs_pu8 (__m64 __m1, __m64 __m2)
452
{
453
  return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
454
}
455
 
456
static __inline __m64 __attribute__((__always_inline__))
457
_m_psubusb (__m64 __m1, __m64 __m2)
458
{
459
  return _mm_subs_pu8 (__m1, __m2);
460
}
461
 
462
/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
463
   unsigned saturating arithmetic.  */
464
static __inline __m64 __attribute__((__always_inline__))
465
_mm_subs_pu16 (__m64 __m1, __m64 __m2)
466
{
467
  return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
468
}
469
 
470
static __inline __m64 __attribute__((__always_inline__))
471
_m_psubusw (__m64 __m1, __m64 __m2)
472
{
473
  return _mm_subs_pu16 (__m1, __m2);
474
}
475
 
476
/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
477
   four 32-bit intermediate results, which are then summed by pairs to
478
   produce two 32-bit results.  */
479
static __inline __m64 __attribute__((__always_inline__))
480
_mm_madd_pi16 (__m64 __m1, __m64 __m2)
481
{
482
  return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
483
}
484
 
485
static __inline __m64 __attribute__((__always_inline__))
486
_m_pmaddwd (__m64 __m1, __m64 __m2)
487
{
488
  return _mm_madd_pi16 (__m1, __m2);
489
}
490
 
491
/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
492
   M2 and produce the high 16 bits of the 32-bit results.  */
493
static __inline __m64 __attribute__((__always_inline__))
494
_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
495
{
496
  return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
497
}
498
 
499
static __inline __m64 __attribute__((__always_inline__))
500
_m_pmulhw (__m64 __m1, __m64 __m2)
501
{
502
  return _mm_mulhi_pi16 (__m1, __m2);
503
}
504
 
505
/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
506
   the low 16 bits of the results.  */
507
static __inline __m64 __attribute__((__always_inline__))
508
_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
509
{
510
  return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
511
}
512
 
513
static __inline __m64 __attribute__((__always_inline__))
514
_m_pmullw (__m64 __m1, __m64 __m2)
515
{
516
  return _mm_mullo_pi16 (__m1, __m2);
517
}
518
 
519
/* Shift four 16-bit values in M left by COUNT.  */
520
static __inline __m64 __attribute__((__always_inline__))
521
_mm_sll_pi16 (__m64 __m, __m64 __count)
522
{
523
  return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count);
524
}
525
 
526
static __inline __m64 __attribute__((__always_inline__))
527
_m_psllw (__m64 __m, __m64 __count)
528
{
529
  return _mm_sll_pi16 (__m, __count);
530
}
531
 
532
static __inline __m64 __attribute__((__always_inline__))
533
_mm_slli_pi16 (__m64 __m, int __count)
534
{
535
  return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count);
536
}
537
 
538
static __inline __m64 __attribute__((__always_inline__))
539
_m_psllwi (__m64 __m, int __count)
540
{
541
  return _mm_slli_pi16 (__m, __count);
542
}
543
 
544
/* Shift two 32-bit values in M left by COUNT.  */
545
static __inline __m64 __attribute__((__always_inline__))
546
_mm_sll_pi32 (__m64 __m, __m64 __count)
547
{
548
  return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count);
549
}
550
 
551
static __inline __m64 __attribute__((__always_inline__))
552
_m_pslld (__m64 __m, __m64 __count)
553
{
554
  return _mm_sll_pi32 (__m, __count);
555
}
556
 
557
static __inline __m64 __attribute__((__always_inline__))
558
_mm_slli_pi32 (__m64 __m, int __count)
559
{
560
  return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count);
561
}
562
 
563
static __inline __m64 __attribute__((__always_inline__))
564
_m_pslldi (__m64 __m, int __count)
565
{
566
  return _mm_slli_pi32 (__m, __count);
567
}
568
 
569
/* Shift the 64-bit value in M left by COUNT.  */
570
static __inline __m64 __attribute__((__always_inline__))
571
_mm_sll_si64 (__m64 __m, __m64 __count)
572
{
573
  return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
574
}
575
 
576
static __inline __m64 __attribute__((__always_inline__))
577
_m_psllq (__m64 __m, __m64 __count)
578
{
579
  return _mm_sll_si64 (__m, __count);
580
}
581
 
582
static __inline __m64 __attribute__((__always_inline__))
583
_mm_slli_si64 (__m64 __m, int __count)
584
{
585
  return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
586
}
587
 
588
static __inline __m64 __attribute__((__always_inline__))
589
_m_psllqi (__m64 __m, int __count)
590
{
591
  return _mm_slli_si64 (__m, __count);
592
}
593
 
594
/* Shift four 16-bit values in M right by COUNT; shift in the sign bit.  */
595
static __inline __m64 __attribute__((__always_inline__))
596
_mm_sra_pi16 (__m64 __m, __m64 __count)
597
{
598
  return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count);
599
}
600
 
601
static __inline __m64 __attribute__((__always_inline__))
602
_m_psraw (__m64 __m, __m64 __count)
603
{
604
  return _mm_sra_pi16 (__m, __count);
605
}
606
 
607
static __inline __m64 __attribute__((__always_inline__))
608
_mm_srai_pi16 (__m64 __m, int __count)
609
{
610
  return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count);
611
}
612
 
613
static __inline __m64 __attribute__((__always_inline__))
614
_m_psrawi (__m64 __m, int __count)
615
{
616
  return _mm_srai_pi16 (__m, __count);
617
}
618
 
619
/* Shift two 32-bit values in M right by COUNT; shift in the sign bit.  */
620
static __inline __m64 __attribute__((__always_inline__))
621
_mm_sra_pi32 (__m64 __m, __m64 __count)
622
{
623
  return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count);
624
}
625
 
626
static __inline __m64 __attribute__((__always_inline__))
627
_m_psrad (__m64 __m, __m64 __count)
628
{
629
  return _mm_sra_pi32 (__m, __count);
630
}
631
 
632
static __inline __m64 __attribute__((__always_inline__))
633
_mm_srai_pi32 (__m64 __m, int __count)
634
{
635
  return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count);
636
}
637
 
638
static __inline __m64 __attribute__((__always_inline__))
639
_m_psradi (__m64 __m, int __count)
640
{
641
  return _mm_srai_pi32 (__m, __count);
642
}
643
 
644
/* Shift four 16-bit values in M right by COUNT; shift in zeros.  */
645
static __inline __m64 __attribute__((__always_inline__))
646
_mm_srl_pi16 (__m64 __m, __m64 __count)
647
{
648
  return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count);
649
}
650
 
651
static __inline __m64 __attribute__((__always_inline__))
652
_m_psrlw (__m64 __m, __m64 __count)
653
{
654
  return _mm_srl_pi16 (__m, __count);
655
}
656
 
657
static __inline __m64 __attribute__((__always_inline__))
658
_mm_srli_pi16 (__m64 __m, int __count)
659
{
660
  return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count);
661
}
662
 
663
static __inline __m64 __attribute__((__always_inline__))
664
_m_psrlwi (__m64 __m, int __count)
665
{
666
  return _mm_srli_pi16 (__m, __count);
667
}
668
 
669
/* Shift two 32-bit values in M right by COUNT; shift in zeros.  */
670
static __inline __m64 __attribute__((__always_inline__))
671
_mm_srl_pi32 (__m64 __m, __m64 __count)
672
{
673
  return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count);
674
}
675
 
676
static __inline __m64 __attribute__((__always_inline__))
677
_m_psrld (__m64 __m, __m64 __count)
678
{
679
  return _mm_srl_pi32 (__m, __count);
680
}
681
 
682
static __inline __m64 __attribute__((__always_inline__))
683
_mm_srli_pi32 (__m64 __m, int __count)
684
{
685
  return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count);
686
}
687
 
688
static __inline __m64 __attribute__((__always_inline__))
689
_m_psrldi (__m64 __m, int __count)
690
{
691
  return _mm_srli_pi32 (__m, __count);
692
}
693
 
694
/* Shift the 64-bit value in M left by COUNT; shift in zeros.  */
695
static __inline __m64 __attribute__((__always_inline__))
696
_mm_srl_si64 (__m64 __m, __m64 __count)
697
{
698
  return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
699
}
700
 
701
static __inline __m64 __attribute__((__always_inline__))
702
_m_psrlq (__m64 __m, __m64 __count)
703
{
704
  return _mm_srl_si64 (__m, __count);
705
}
706
 
707
static __inline __m64 __attribute__((__always_inline__))
708
_mm_srli_si64 (__m64 __m, int __count)
709
{
710
  return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
711
}
712
 
713
static __inline __m64 __attribute__((__always_inline__))
714
_m_psrlqi (__m64 __m, int __count)
715
{
716
  return _mm_srli_si64 (__m, __count);
717
}
718
 
719
/* Bit-wise AND the 64-bit values in M1 and M2.  */
720
static __inline __m64 __attribute__((__always_inline__))
721
_mm_and_si64 (__m64 __m1, __m64 __m2)
722
{
723
  return __builtin_ia32_pand (__m1, __m2);
724
}
725
 
726
static __inline __m64 __attribute__((__always_inline__))
727
_m_pand (__m64 __m1, __m64 __m2)
728
{
729
  return _mm_and_si64 (__m1, __m2);
730
}
731
 
732
/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
733
   64-bit value in M2.  */
734
static __inline __m64 __attribute__((__always_inline__))
735
_mm_andnot_si64 (__m64 __m1, __m64 __m2)
736
{
737
  return __builtin_ia32_pandn (__m1, __m2);
738
}
739
 
740
static __inline __m64 __attribute__((__always_inline__))
741
_m_pandn (__m64 __m1, __m64 __m2)
742
{
743
  return _mm_andnot_si64 (__m1, __m2);
744
}
745
 
746
/* Bit-wise inclusive OR the 64-bit values in M1 and M2.  */
747
static __inline __m64 __attribute__((__always_inline__))
748
_mm_or_si64 (__m64 __m1, __m64 __m2)
749
{
750
  return __builtin_ia32_por (__m1, __m2);
751
}
752
 
753
static __inline __m64 __attribute__((__always_inline__))
754
_m_por (__m64 __m1, __m64 __m2)
755
{
756
  return _mm_or_si64 (__m1, __m2);
757
}
758
 
759
/* Bit-wise exclusive OR the 64-bit values in M1 and M2.  */
760
static __inline __m64 __attribute__((__always_inline__))
761
_mm_xor_si64 (__m64 __m1, __m64 __m2)
762
{
763
  return __builtin_ia32_pxor (__m1, __m2);
764
}
765
 
766
static __inline __m64 __attribute__((__always_inline__))
767
_m_pxor (__m64 __m1, __m64 __m2)
768
{
769
  return _mm_xor_si64 (__m1, __m2);
770
}
771
 
772
/* Compare eight 8-bit values.  The result of the comparison is 0xFF if the
773
   test is true and zero if false.  */
774
static __inline __m64 __attribute__((__always_inline__))
775
_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
776
{
777
  return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
778
}
779
 
780
static __inline __m64 __attribute__((__always_inline__))
781
_m_pcmpeqb (__m64 __m1, __m64 __m2)
782
{
783
  return _mm_cmpeq_pi8 (__m1, __m2);
784
}
785
 
786
static __inline __m64 __attribute__((__always_inline__))
787
_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
788
{
789
  return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
790
}
791
 
792
static __inline __m64 __attribute__((__always_inline__))
793
_m_pcmpgtb (__m64 __m1, __m64 __m2)
794
{
795
  return _mm_cmpgt_pi8 (__m1, __m2);
796
}
797
 
798
/* Compare four 16-bit values.  The result of the comparison is 0xFFFF if
799
   the test is true and zero if false.  */
800
static __inline __m64 __attribute__((__always_inline__))
801
_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
802
{
803
  return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
804
}
805
 
806
static __inline __m64 __attribute__((__always_inline__))
807
_m_pcmpeqw (__m64 __m1, __m64 __m2)
808
{
809
  return _mm_cmpeq_pi16 (__m1, __m2);
810
}
811
 
812
static __inline __m64 __attribute__((__always_inline__))
813
_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
814
{
815
  return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
816
}
817
 
818
static __inline __m64 __attribute__((__always_inline__))
819
_m_pcmpgtw (__m64 __m1, __m64 __m2)
820
{
821
  return _mm_cmpgt_pi16 (__m1, __m2);
822
}
823
 
824
/* Compare two 32-bit values.  The result of the comparison is 0xFFFFFFFF if
825
   the test is true and zero if false.  */
826
static __inline __m64 __attribute__((__always_inline__))
827
_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
828
{
829
  return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
830
}
831
 
832
static __inline __m64 __attribute__((__always_inline__))
833
_m_pcmpeqd (__m64 __m1, __m64 __m2)
834
{
835
  return _mm_cmpeq_pi32 (__m1, __m2);
836
}
837
 
838
static __inline __m64 __attribute__((__always_inline__))
839
_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
840
{
841
  return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
842
}
843
 
844
static __inline __m64 __attribute__((__always_inline__))
845
_m_pcmpgtd (__m64 __m1, __m64 __m2)
846
{
847
  return _mm_cmpgt_pi32 (__m1, __m2);
848
}
849
 
850
/* Creates a 64-bit zero.  */
851
static __inline __m64 __attribute__((__always_inline__))
852
_mm_setzero_si64 (void)
853
{
854
  return (__m64)0LL;
855
}
856
 
857
/* Creates a vector of two 32-bit values; I0 is least significant.  */
858
static __inline __m64 __attribute__((__always_inline__))
859
_mm_set_pi32 (int __i1, int __i0)
860
{
861
  return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
862
}
863
 
864
/* Creates a vector of four 16-bit values; W0 is least significant.  */
865
static __inline __m64 __attribute__((__always_inline__))
866
_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
867
{
868
  return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
869
}
870
 
871
/* Creates a vector of eight 8-bit values; B0 is least significant.  */
872
static __inline __m64 __attribute__((__always_inline__))
873
_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
874
             char __b3, char __b2, char __b1, char __b0)
875
{
876
  return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
877
                                               __b4, __b5, __b6, __b7);
878
}
879
 
880
/* Similar, but with the arguments in reverse order.  */
881
static __inline __m64 __attribute__((__always_inline__))
882
_mm_setr_pi32 (int __i0, int __i1)
883
{
884
  return _mm_set_pi32 (__i1, __i0);
885
}
886
 
887
static __inline __m64 __attribute__((__always_inline__))
888
_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
889
{
890
  return _mm_set_pi16 (__w3, __w2, __w1, __w0);
891
}
892
 
893
static __inline __m64 __attribute__((__always_inline__))
894
_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
895
              char __b4, char __b5, char __b6, char __b7)
896
{
897
  return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
898
}
899
 
900
/* Creates a vector of two 32-bit values, both elements containing I.  */
901
static __inline __m64 __attribute__((__always_inline__))
902
_mm_set1_pi32 (int __i)
903
{
904
  return _mm_set_pi32 (__i, __i);
905
}
906
 
907
/* Creates a vector of four 16-bit values, all elements containing W.  */
908
static __inline __m64 __attribute__((__always_inline__))
909
_mm_set1_pi16 (short __w)
910
{
911
  return _mm_set_pi16 (__w, __w, __w, __w);
912
}
913
 
914
/* Creates a vector of eight 8-bit values, all elements containing B.  */
915
static __inline __m64 __attribute__((__always_inline__))
916
_mm_set1_pi8 (char __b)
917
{
918
  return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
919
}
920
 
921
#endif /* __MMX__ */
922
#endif /* _MMINTRIN_H_INCLUDED */

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.