OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [gcc/] [config/] [i386/] [mmintrin.h] - Blame information for rev 12

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 jlechner
/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
2
 
3
   This file is part of GCC.
4
 
5
   GCC is free software; you can redistribute it and/or modify
6
   it under the terms of the GNU General Public License as published by
7
   the Free Software Foundation; either version 2, or (at your option)
8
   any later version.
9
 
10
   GCC is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU General Public License for more details.
14
 
15
   You should have received a copy of the GNU General Public License
16
   along with GCC; see the file COPYING.  If not, write to
17
   the Free Software Foundation, 51 Franklin Street, Fifth Floor,
18
   Boston, MA 02110-1301, USA.  */
19
 
20
/* As a special exception, if you include this header file into source
21
   files compiled by GCC, this header file does not by itself cause
22
   the resulting executable to be covered by the GNU General Public
23
   License.  This exception does not however invalidate any other
24
   reasons why the executable file might be covered by the GNU General
25
   Public License.  */
26
 
27
/* Implemented from the specification included in the Intel C++ Compiler
28
   User Guide and Reference, version 8.0.  */
29
 
30
#ifndef _MMINTRIN_H_INCLUDED
31
#define _MMINTRIN_H_INCLUDED
32
 
33
#ifndef __MMX__
34
# error "MMX instruction set not enabled"
35
#else
36
/* The data type intended for user use.  */
37
typedef int __m64 __attribute__ ((__vector_size__ (8)));
38
 
39
/* Internal data types for implementing the intrinsics.  */
40
typedef int __v2si __attribute__ ((__vector_size__ (8)));
41
typedef short __v4hi __attribute__ ((__vector_size__ (8)));
42
typedef char __v8qi __attribute__ ((__vector_size__ (8)));
43
 
44
/* Empty the multimedia state.  */
45
static __inline void __attribute__((__always_inline__))
46
_mm_empty (void)
47
{
48
  __builtin_ia32_emms ();
49
}
50
 
51
static __inline void __attribute__((__always_inline__))
52
_m_empty (void)
53
{
54
  _mm_empty ();
55
}
56
 
57
/* Convert I to a __m64 object.  The integer is zero-extended to 64-bits.  */
58
static __inline __m64  __attribute__((__always_inline__))
59
_mm_cvtsi32_si64 (int __i)
60
{
61
  return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
62
}
63
 
64
static __inline __m64  __attribute__((__always_inline__))
65
_m_from_int (int __i)
66
{
67
  return _mm_cvtsi32_si64 (__i);
68
}
69
 
70
#ifdef __x86_64__
71
/* Convert I to a __m64 object.  */
72
static __inline __m64  __attribute__((__always_inline__))
73
_mm_cvtsi64x_si64 (long long __i)
74
{
75
  return (__m64) __i;
76
}
77
 
78
/* Convert I to a __m64 object.  */
79
static __inline __m64  __attribute__((__always_inline__))
80
_mm_set_pi64x (long long __i)
81
{
82
  return (__m64) __i;
83
}
84
#endif
85
 
86
/* Convert the lower 32 bits of the __m64 object into an integer.  */
87
static __inline int __attribute__((__always_inline__))
88
_mm_cvtsi64_si32 (__m64 __i)
89
{
90
  return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
91
}
92
 
93
static __inline int __attribute__((__always_inline__))
94
_m_to_int (__m64 __i)
95
{
96
  return _mm_cvtsi64_si32 (__i);
97
}
98
 
99
#ifdef __x86_64__
100
/* Convert the lower 32 bits of the __m64 object into an integer.  */
101
static __inline long long __attribute__((__always_inline__))
102
_mm_cvtsi64_si64x (__m64 __i)
103
{
104
  return (long long)__i;
105
}
106
#endif
107
 
108
/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
109
   the result, and the four 16-bit values from M2 into the upper four 8-bit
110
   values of the result, all with signed saturation.  */
111
static __inline __m64 __attribute__((__always_inline__))
112
_mm_packs_pi16 (__m64 __m1, __m64 __m2)
113
{
114
  return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
115
}
116
 
117
static __inline __m64 __attribute__((__always_inline__))
118
_m_packsswb (__m64 __m1, __m64 __m2)
119
{
120
  return _mm_packs_pi16 (__m1, __m2);
121
}
122
 
123
/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
124
   the result, and the two 32-bit values from M2 into the upper two 16-bit
125
   values of the result, all with signed saturation.  */
126
static __inline __m64 __attribute__((__always_inline__))
127
_mm_packs_pi32 (__m64 __m1, __m64 __m2)
128
{
129
  return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
130
}
131
 
132
static __inline __m64 __attribute__((__always_inline__))
133
_m_packssdw (__m64 __m1, __m64 __m2)
134
{
135
  return _mm_packs_pi32 (__m1, __m2);
136
}
137
 
138
/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
139
   the result, and the four 16-bit values from M2 into the upper four 8-bit
140
   values of the result, all with unsigned saturation.  */
141
static __inline __m64 __attribute__((__always_inline__))
142
_mm_packs_pu16 (__m64 __m1, __m64 __m2)
143
{
144
  return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
145
}
146
 
147
static __inline __m64 __attribute__((__always_inline__))
148
_m_packuswb (__m64 __m1, __m64 __m2)
149
{
150
  return _mm_packs_pu16 (__m1, __m2);
151
}
152
 
153
/* Interleave the four 8-bit values from the high half of M1 with the four
154
   8-bit values from the high half of M2.  */
155
static __inline __m64 __attribute__((__always_inline__))
156
_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
157
{
158
  return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
159
}
160
 
161
static __inline __m64 __attribute__((__always_inline__))
162
_m_punpckhbw (__m64 __m1, __m64 __m2)
163
{
164
  return _mm_unpackhi_pi8 (__m1, __m2);
165
}
166
 
167
/* Interleave the two 16-bit values from the high half of M1 with the two
168
   16-bit values from the high half of M2.  */
169
static __inline __m64 __attribute__((__always_inline__))
170
_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
171
{
172
  return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
173
}
174
 
175
static __inline __m64 __attribute__((__always_inline__))
176
_m_punpckhwd (__m64 __m1, __m64 __m2)
177
{
178
  return _mm_unpackhi_pi16 (__m1, __m2);
179
}
180
 
181
/* Interleave the 32-bit value from the high half of M1 with the 32-bit
182
   value from the high half of M2.  */
183
static __inline __m64 __attribute__((__always_inline__))
184
_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
185
{
186
  return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
187
}
188
 
189
static __inline __m64 __attribute__((__always_inline__))
190
_m_punpckhdq (__m64 __m1, __m64 __m2)
191
{
192
  return _mm_unpackhi_pi32 (__m1, __m2);
193
}
194
 
195
/* Interleave the four 8-bit values from the low half of M1 with the four
196
   8-bit values from the low half of M2.  */
197
static __inline __m64 __attribute__((__always_inline__))
198
_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
199
{
200
  return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
201
}
202
 
203
static __inline __m64 __attribute__((__always_inline__))
204
_m_punpcklbw (__m64 __m1, __m64 __m2)
205
{
206
  return _mm_unpacklo_pi8 (__m1, __m2);
207
}
208
 
209
/* Interleave the two 16-bit values from the low half of M1 with the two
210
   16-bit values from the low half of M2.  */
211
static __inline __m64 __attribute__((__always_inline__))
212
_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
213
{
214
  return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
215
}
216
 
217
static __inline __m64 __attribute__((__always_inline__))
218
_m_punpcklwd (__m64 __m1, __m64 __m2)
219
{
220
  return _mm_unpacklo_pi16 (__m1, __m2);
221
}
222
 
223
/* Interleave the 32-bit value from the low half of M1 with the 32-bit
224
   value from the low half of M2.  */
225
static __inline __m64 __attribute__((__always_inline__))
226
_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
227
{
228
  return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
229
}
230
 
231
static __inline __m64 __attribute__((__always_inline__))
232
_m_punpckldq (__m64 __m1, __m64 __m2)
233
{
234
  return _mm_unpacklo_pi32 (__m1, __m2);
235
}
236
 
237
/* Add the 8-bit values in M1 to the 8-bit values in M2.  */
238
static __inline __m64 __attribute__((__always_inline__))
239
_mm_add_pi8 (__m64 __m1, __m64 __m2)
240
{
241
  return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
242
}
243
 
244
static __inline __m64 __attribute__((__always_inline__))
245
_m_paddb (__m64 __m1, __m64 __m2)
246
{
247
  return _mm_add_pi8 (__m1, __m2);
248
}
249
 
250
/* Add the 16-bit values in M1 to the 16-bit values in M2.  */
251
static __inline __m64 __attribute__((__always_inline__))
252
_mm_add_pi16 (__m64 __m1, __m64 __m2)
253
{
254
  return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
255
}
256
 
257
static __inline __m64 __attribute__((__always_inline__))
258
_m_paddw (__m64 __m1, __m64 __m2)
259
{
260
  return _mm_add_pi16 (__m1, __m2);
261
}
262
 
263
/* Add the 32-bit values in M1 to the 32-bit values in M2.  */
264
static __inline __m64 __attribute__((__always_inline__))
265
_mm_add_pi32 (__m64 __m1, __m64 __m2)
266
{
267
  return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
268
}
269
 
270
static __inline __m64 __attribute__((__always_inline__))
271
_m_paddd (__m64 __m1, __m64 __m2)
272
{
273
  return _mm_add_pi32 (__m1, __m2);
274
}
275
 
276
/* Add the 64-bit values in M1 to the 64-bit values in M2.  */
277
static __inline __m64 __attribute__((__always_inline__))
278
_mm_add_si64 (__m64 __m1, __m64 __m2)
279
{
280
  return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2);
281
}
282
 
283
/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
284
   saturated arithmetic.  */
285
static __inline __m64 __attribute__((__always_inline__))
286
_mm_adds_pi8 (__m64 __m1, __m64 __m2)
287
{
288
  return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
289
}
290
 
291
static __inline __m64 __attribute__((__always_inline__))
292
_m_paddsb (__m64 __m1, __m64 __m2)
293
{
294
  return _mm_adds_pi8 (__m1, __m2);
295
}
296
 
297
/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
298
   saturated arithmetic.  */
299
static __inline __m64 __attribute__((__always_inline__))
300
_mm_adds_pi16 (__m64 __m1, __m64 __m2)
301
{
302
  return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
303
}
304
 
305
static __inline __m64 __attribute__((__always_inline__))
306
_m_paddsw (__m64 __m1, __m64 __m2)
307
{
308
  return _mm_adds_pi16 (__m1, __m2);
309
}
310
 
311
/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
312
   saturated arithmetic.  */
313
static __inline __m64 __attribute__((__always_inline__))
314
_mm_adds_pu8 (__m64 __m1, __m64 __m2)
315
{
316
  return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
317
}
318
 
319
static __inline __m64 __attribute__((__always_inline__))
320
_m_paddusb (__m64 __m1, __m64 __m2)
321
{
322
  return _mm_adds_pu8 (__m1, __m2);
323
}
324
 
325
/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
326
   saturated arithmetic.  */
327
static __inline __m64 __attribute__((__always_inline__))
328
_mm_adds_pu16 (__m64 __m1, __m64 __m2)
329
{
330
  return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
331
}
332
 
333
static __inline __m64 __attribute__((__always_inline__))
334
_m_paddusw (__m64 __m1, __m64 __m2)
335
{
336
  return _mm_adds_pu16 (__m1, __m2);
337
}
338
 
339
/* Subtract the 8-bit values in M2 from the 8-bit values in M1.  */
340
static __inline __m64 __attribute__((__always_inline__))
341
_mm_sub_pi8 (__m64 __m1, __m64 __m2)
342
{
343
  return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
344
}
345
 
346
static __inline __m64 __attribute__((__always_inline__))
347
_m_psubb (__m64 __m1, __m64 __m2)
348
{
349
  return _mm_sub_pi8 (__m1, __m2);
350
}
351
 
352
/* Subtract the 16-bit values in M2 from the 16-bit values in M1.  */
353
static __inline __m64 __attribute__((__always_inline__))
354
_mm_sub_pi16 (__m64 __m1, __m64 __m2)
355
{
356
  return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
357
}
358
 
359
static __inline __m64 __attribute__((__always_inline__))
360
_m_psubw (__m64 __m1, __m64 __m2)
361
{
362
  return _mm_sub_pi16 (__m1, __m2);
363
}
364
 
365
/* Subtract the 32-bit values in M2 from the 32-bit values in M1.  */
366
static __inline __m64 __attribute__((__always_inline__))
367
_mm_sub_pi32 (__m64 __m1, __m64 __m2)
368
{
369
  return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
370
}
371
 
372
static __inline __m64 __attribute__((__always_inline__))
373
_m_psubd (__m64 __m1, __m64 __m2)
374
{
375
  return _mm_sub_pi32 (__m1, __m2);
376
}
377
 
378
/* Add the 64-bit values in M1 to the 64-bit values in M2.  */
379
static __inline __m64 __attribute__((__always_inline__))
380
_mm_sub_si64 (__m64 __m1, __m64 __m2)
381
{
382
  return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2);
383
}
384
 
385
/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
386
   saturating arithmetic.  */
387
static __inline __m64 __attribute__((__always_inline__))
388
_mm_subs_pi8 (__m64 __m1, __m64 __m2)
389
{
390
  return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
391
}
392
 
393
static __inline __m64 __attribute__((__always_inline__))
394
_m_psubsb (__m64 __m1, __m64 __m2)
395
{
396
  return _mm_subs_pi8 (__m1, __m2);
397
}
398
 
399
/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
400
   signed saturating arithmetic.  */
401
static __inline __m64 __attribute__((__always_inline__))
402
_mm_subs_pi16 (__m64 __m1, __m64 __m2)
403
{
404
  return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
405
}
406
 
407
static __inline __m64 __attribute__((__always_inline__))
408
_m_psubsw (__m64 __m1, __m64 __m2)
409
{
410
  return _mm_subs_pi16 (__m1, __m2);
411
}
412
 
413
/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
414
   unsigned saturating arithmetic.  */
415
static __inline __m64 __attribute__((__always_inline__))
416
_mm_subs_pu8 (__m64 __m1, __m64 __m2)
417
{
418
  return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
419
}
420
 
421
static __inline __m64 __attribute__((__always_inline__))
422
_m_psubusb (__m64 __m1, __m64 __m2)
423
{
424
  return _mm_subs_pu8 (__m1, __m2);
425
}
426
 
427
/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
428
   unsigned saturating arithmetic.  */
429
static __inline __m64 __attribute__((__always_inline__))
430
_mm_subs_pu16 (__m64 __m1, __m64 __m2)
431
{
432
  return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
433
}
434
 
435
static __inline __m64 __attribute__((__always_inline__))
436
_m_psubusw (__m64 __m1, __m64 __m2)
437
{
438
  return _mm_subs_pu16 (__m1, __m2);
439
}
440
 
441
/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
442
   four 32-bit intermediate results, which are then summed by pairs to
443
   produce two 32-bit results.  */
444
static __inline __m64 __attribute__((__always_inline__))
445
_mm_madd_pi16 (__m64 __m1, __m64 __m2)
446
{
447
  return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
448
}
449
 
450
static __inline __m64 __attribute__((__always_inline__))
451
_m_pmaddwd (__m64 __m1, __m64 __m2)
452
{
453
  return _mm_madd_pi16 (__m1, __m2);
454
}
455
 
456
/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
457
   M2 and produce the high 16 bits of the 32-bit results.  */
458
static __inline __m64 __attribute__((__always_inline__))
459
_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
460
{
461
  return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
462
}
463
 
464
static __inline __m64 __attribute__((__always_inline__))
465
_m_pmulhw (__m64 __m1, __m64 __m2)
466
{
467
  return _mm_mulhi_pi16 (__m1, __m2);
468
}
469
 
470
/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
471
   the low 16 bits of the results.  */
472
static __inline __m64 __attribute__((__always_inline__))
473
_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
474
{
475
  return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
476
}
477
 
478
static __inline __m64 __attribute__((__always_inline__))
479
_m_pmullw (__m64 __m1, __m64 __m2)
480
{
481
  return _mm_mullo_pi16 (__m1, __m2);
482
}
483
 
484
/* Shift four 16-bit values in M left by COUNT.  */
485
static __inline __m64 __attribute__((__always_inline__))
486
_mm_sll_pi16 (__m64 __m, __m64 __count)
487
{
488
  return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count);
489
}
490
 
491
static __inline __m64 __attribute__((__always_inline__))
492
_m_psllw (__m64 __m, __m64 __count)
493
{
494
  return _mm_sll_pi16 (__m, __count);
495
}
496
 
497
static __inline __m64 __attribute__((__always_inline__))
498
_mm_slli_pi16 (__m64 __m, int __count)
499
{
500
  return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count);
501
}
502
 
503
static __inline __m64 __attribute__((__always_inline__))
504
_m_psllwi (__m64 __m, int __count)
505
{
506
  return _mm_slli_pi16 (__m, __count);
507
}
508
 
509
/* Shift two 32-bit values in M left by COUNT.  */
510
static __inline __m64 __attribute__((__always_inline__))
511
_mm_sll_pi32 (__m64 __m, __m64 __count)
512
{
513
  return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count);
514
}
515
 
516
static __inline __m64 __attribute__((__always_inline__))
517
_m_pslld (__m64 __m, __m64 __count)
518
{
519
  return _mm_sll_pi32 (__m, __count);
520
}
521
 
522
static __inline __m64 __attribute__((__always_inline__))
523
_mm_slli_pi32 (__m64 __m, int __count)
524
{
525
  return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count);
526
}
527
 
528
static __inline __m64 __attribute__((__always_inline__))
529
_m_pslldi (__m64 __m, int __count)
530
{
531
  return _mm_slli_pi32 (__m, __count);
532
}
533
 
534
/* Shift the 64-bit value in M left by COUNT.  */
535
static __inline __m64 __attribute__((__always_inline__))
536
_mm_sll_si64 (__m64 __m, __m64 __count)
537
{
538
  return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
539
}
540
 
541
static __inline __m64 __attribute__((__always_inline__))
542
_m_psllq (__m64 __m, __m64 __count)
543
{
544
  return _mm_sll_si64 (__m, __count);
545
}
546
 
547
static __inline __m64 __attribute__((__always_inline__))
548
_mm_slli_si64 (__m64 __m, int __count)
549
{
550
  return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
551
}
552
 
553
static __inline __m64 __attribute__((__always_inline__))
554
_m_psllqi (__m64 __m, int __count)
555
{
556
  return _mm_slli_si64 (__m, __count);
557
}
558
 
559
/* Shift four 16-bit values in M right by COUNT; shift in the sign bit.  */
560
static __inline __m64 __attribute__((__always_inline__))
561
_mm_sra_pi16 (__m64 __m, __m64 __count)
562
{
563
  return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count);
564
}
565
 
566
static __inline __m64 __attribute__((__always_inline__))
567
_m_psraw (__m64 __m, __m64 __count)
568
{
569
  return _mm_sra_pi16 (__m, __count);
570
}
571
 
572
static __inline __m64 __attribute__((__always_inline__))
573
_mm_srai_pi16 (__m64 __m, int __count)
574
{
575
  return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count);
576
}
577
 
578
static __inline __m64 __attribute__((__always_inline__))
579
_m_psrawi (__m64 __m, int __count)
580
{
581
  return _mm_srai_pi16 (__m, __count);
582
}
583
 
584
/* Shift two 32-bit values in M right by COUNT; shift in the sign bit.  */
585
static __inline __m64 __attribute__((__always_inline__))
586
_mm_sra_pi32 (__m64 __m, __m64 __count)
587
{
588
  return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count);
589
}
590
 
591
static __inline __m64 __attribute__((__always_inline__))
592
_m_psrad (__m64 __m, __m64 __count)
593
{
594
  return _mm_sra_pi32 (__m, __count);
595
}
596
 
597
static __inline __m64 __attribute__((__always_inline__))
598
_mm_srai_pi32 (__m64 __m, int __count)
599
{
600
  return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count);
601
}
602
 
603
static __inline __m64 __attribute__((__always_inline__))
604
_m_psradi (__m64 __m, int __count)
605
{
606
  return _mm_srai_pi32 (__m, __count);
607
}
608
 
609
/* Shift four 16-bit values in M right by COUNT; shift in zeros.  */
610
static __inline __m64 __attribute__((__always_inline__))
611
_mm_srl_pi16 (__m64 __m, __m64 __count)
612
{
613
  return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count);
614
}
615
 
616
static __inline __m64 __attribute__((__always_inline__))
617
_m_psrlw (__m64 __m, __m64 __count)
618
{
619
  return _mm_srl_pi16 (__m, __count);
620
}
621
 
622
static __inline __m64 __attribute__((__always_inline__))
623
_mm_srli_pi16 (__m64 __m, int __count)
624
{
625
  return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count);
626
}
627
 
628
static __inline __m64 __attribute__((__always_inline__))
629
_m_psrlwi (__m64 __m, int __count)
630
{
631
  return _mm_srli_pi16 (__m, __count);
632
}
633
 
634
/* Shift two 32-bit values in M right by COUNT; shift in zeros.  */
635
static __inline __m64 __attribute__((__always_inline__))
636
_mm_srl_pi32 (__m64 __m, __m64 __count)
637
{
638
  return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count);
639
}
640
 
641
static __inline __m64 __attribute__((__always_inline__))
642
_m_psrld (__m64 __m, __m64 __count)
643
{
644
  return _mm_srl_pi32 (__m, __count);
645
}
646
 
647
static __inline __m64 __attribute__((__always_inline__))
648
_mm_srli_pi32 (__m64 __m, int __count)
649
{
650
  return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count);
651
}
652
 
653
static __inline __m64 __attribute__((__always_inline__))
654
_m_psrldi (__m64 __m, int __count)
655
{
656
  return _mm_srli_pi32 (__m, __count);
657
}
658
 
659
/* Shift the 64-bit value in M left by COUNT; shift in zeros.  */
660
static __inline __m64 __attribute__((__always_inline__))
661
_mm_srl_si64 (__m64 __m, __m64 __count)
662
{
663
  return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
664
}
665
 
666
static __inline __m64 __attribute__((__always_inline__))
667
_m_psrlq (__m64 __m, __m64 __count)
668
{
669
  return _mm_srl_si64 (__m, __count);
670
}
671
 
672
static __inline __m64 __attribute__((__always_inline__))
673
_mm_srli_si64 (__m64 __m, int __count)
674
{
675
  return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
676
}
677
 
678
static __inline __m64 __attribute__((__always_inline__))
679
_m_psrlqi (__m64 __m, int __count)
680
{
681
  return _mm_srli_si64 (__m, __count);
682
}
683
 
684
/* Bit-wise AND the 64-bit values in M1 and M2.  */
685
static __inline __m64 __attribute__((__always_inline__))
686
_mm_and_si64 (__m64 __m1, __m64 __m2)
687
{
688
  return __builtin_ia32_pand (__m1, __m2);
689
}
690
 
691
static __inline __m64 __attribute__((__always_inline__))
692
_m_pand (__m64 __m1, __m64 __m2)
693
{
694
  return _mm_and_si64 (__m1, __m2);
695
}
696
 
697
/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
698
   64-bit value in M2.  */
699
static __inline __m64 __attribute__((__always_inline__))
700
_mm_andnot_si64 (__m64 __m1, __m64 __m2)
701
{
702
  return __builtin_ia32_pandn (__m1, __m2);
703
}
704
 
705
static __inline __m64 __attribute__((__always_inline__))
706
_m_pandn (__m64 __m1, __m64 __m2)
707
{
708
  return _mm_andnot_si64 (__m1, __m2);
709
}
710
 
711
/* Bit-wise inclusive OR the 64-bit values in M1 and M2.  */
712
static __inline __m64 __attribute__((__always_inline__))
713
_mm_or_si64 (__m64 __m1, __m64 __m2)
714
{
715
  return __builtin_ia32_por (__m1, __m2);
716
}
717
 
718
static __inline __m64 __attribute__((__always_inline__))
719
_m_por (__m64 __m1, __m64 __m2)
720
{
721
  return _mm_or_si64 (__m1, __m2);
722
}
723
 
724
/* Bit-wise exclusive OR the 64-bit values in M1 and M2.  */
725
static __inline __m64 __attribute__((__always_inline__))
726
_mm_xor_si64 (__m64 __m1, __m64 __m2)
727
{
728
  return __builtin_ia32_pxor (__m1, __m2);
729
}
730
 
731
static __inline __m64 __attribute__((__always_inline__))
732
_m_pxor (__m64 __m1, __m64 __m2)
733
{
734
  return _mm_xor_si64 (__m1, __m2);
735
}
736
 
737
/* Compare eight 8-bit values.  The result of the comparison is 0xFF if the
738
   test is true and zero if false.  */
739
static __inline __m64 __attribute__((__always_inline__))
740
_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
741
{
742
  return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
743
}
744
 
745
static __inline __m64 __attribute__((__always_inline__))
746
_m_pcmpeqb (__m64 __m1, __m64 __m2)
747
{
748
  return _mm_cmpeq_pi8 (__m1, __m2);
749
}
750
 
751
static __inline __m64 __attribute__((__always_inline__))
752
_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
753
{
754
  return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
755
}
756
 
757
static __inline __m64 __attribute__((__always_inline__))
758
_m_pcmpgtb (__m64 __m1, __m64 __m2)
759
{
760
  return _mm_cmpgt_pi8 (__m1, __m2);
761
}
762
 
763
/* Compare four 16-bit values.  The result of the comparison is 0xFFFF if
764
   the test is true and zero if false.  */
765
static __inline __m64 __attribute__((__always_inline__))
766
_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
767
{
768
  return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
769
}
770
 
771
static __inline __m64 __attribute__((__always_inline__))
772
_m_pcmpeqw (__m64 __m1, __m64 __m2)
773
{
774
  return _mm_cmpeq_pi16 (__m1, __m2);
775
}
776
 
777
static __inline __m64 __attribute__((__always_inline__))
778
_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
779
{
780
  return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
781
}
782
 
783
static __inline __m64 __attribute__((__always_inline__))
784
_m_pcmpgtw (__m64 __m1, __m64 __m2)
785
{
786
  return _mm_cmpgt_pi16 (__m1, __m2);
787
}
788
 
789
/* Compare two 32-bit values.  The result of the comparison is 0xFFFFFFFF if
790
   the test is true and zero if false.  */
791
static __inline __m64 __attribute__((__always_inline__))
792
_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
793
{
794
  return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
795
}
796
 
797
static __inline __m64 __attribute__((__always_inline__))
798
_m_pcmpeqd (__m64 __m1, __m64 __m2)
799
{
800
  return _mm_cmpeq_pi32 (__m1, __m2);
801
}
802
 
803
static __inline __m64 __attribute__((__always_inline__))
804
_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
805
{
806
  return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
807
}
808
 
809
static __inline __m64 __attribute__((__always_inline__))
810
_m_pcmpgtd (__m64 __m1, __m64 __m2)
811
{
812
  return _mm_cmpgt_pi32 (__m1, __m2);
813
}
814
 
815
/* Creates a 64-bit zero.  */
816
static __inline __m64 __attribute__((__always_inline__))
817
_mm_setzero_si64 (void)
818
{
819
  return (__m64)0LL;
820
}
821
 
822
/* Creates a vector of two 32-bit values; I0 is least significant.  */
823
static __inline __m64 __attribute__((__always_inline__))
824
_mm_set_pi32 (int __i1, int __i0)
825
{
826
  return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
827
}
828
 
829
/* Creates a vector of four 16-bit values; W0 is least significant.  */
830
static __inline __m64 __attribute__((__always_inline__))
831
_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
832
{
833
  return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
834
}
835
 
836
/* Creates a vector of eight 8-bit values; B0 is least significant.  */
837
static __inline __m64 __attribute__((__always_inline__))
838
_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
839
             char __b3, char __b2, char __b1, char __b0)
840
{
841
  return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
842
                                               __b4, __b5, __b6, __b7);
843
}
844
 
845
/* Similar, but with the arguments in reverse order.  */
846
static __inline __m64 __attribute__((__always_inline__))
847
_mm_setr_pi32 (int __i0, int __i1)
848
{
849
  return _mm_set_pi32 (__i1, __i0);
850
}
851
 
852
static __inline __m64 __attribute__((__always_inline__))
853
_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
854
{
855
  return _mm_set_pi16 (__w3, __w2, __w1, __w0);
856
}
857
 
858
static __inline __m64 __attribute__((__always_inline__))
859
_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
860
              char __b4, char __b5, char __b6, char __b7)
861
{
862
  return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
863
}
864
 
865
/* Creates a vector of two 32-bit values, both elements containing I.  */
866
static __inline __m64 __attribute__((__always_inline__))
867
_mm_set1_pi32 (int __i)
868
{
869
  return _mm_set_pi32 (__i, __i);
870
}
871
 
872
/* Creates a vector of four 16-bit values, all elements containing W.  */
873
static __inline __m64 __attribute__((__always_inline__))
874
_mm_set1_pi16 (short __w)
875
{
876
  return _mm_set_pi16 (__w, __w, __w, __w);
877
}
878
 
879
/* Creates a vector of eight 8-bit values, all elements containing B.  */
880
static __inline __m64 __attribute__((__always_inline__))
881
_mm_set1_pi8 (char __b)
882
{
883
  return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
884
}
885
 
886
#endif /* __MMX__ */
887
#endif /* _MMINTRIN_H_INCLUDED */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.