1 |
689 |
jeremybenn |
/* { dg-require-effective-target vect_int } */
|
2 |
|
|
|
3 |
|
|
#include <stdarg.h>
|
4 |
|
|
#include "tree-vect.h"
|
5 |
|
|
|
6 |
|
|
#define N 32
|
7 |
|
|
|
8 |
|
|
unsigned short sa[N];
|
9 |
|
|
unsigned short sc[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
|
10 |
|
|
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
|
11 |
|
|
unsigned short sb[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
|
12 |
|
|
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
|
13 |
|
|
unsigned int ia[N];
|
14 |
|
|
unsigned int ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
|
15 |
|
|
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
16 |
|
|
unsigned int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
|
17 |
|
|
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
18 |
|
|
|
19 |
|
|
/* Current peeling-for-alignment scheme will consider the 'sa[i+7]'
|
20 |
|
|
access for peeling, and therefore will examine the option of
|
21 |
|
|
using a peeling factor = VF-7%VF. This will result in a peeling factor 1,
|
22 |
|
|
which will also align the access to 'ia[i+3]', and the loop could be
|
23 |
|
|
vectorized on all targets that support unaligned loads.
|
24 |
|
|
Without cost model on targets that support misaligned stores, no peeling
|
25 |
|
|
will be applied since we want to keep the four loads aligned. */
|
26 |
|
|
|
27 |
|
|
__attribute__ ((noinline))
|
28 |
|
|
int main1 (int n)
|
29 |
|
|
{
|
30 |
|
|
int i;
|
31 |
|
|
|
32 |
|
|
/* Multiple types with different sizes, used in independent
|
33 |
|
|
copmutations. Vectorizable. */
|
34 |
|
|
for (i = 0; i < n; i++)
|
35 |
|
|
{
|
36 |
|
|
sa[i+7] = sb[i] + sc[i];
|
37 |
|
|
ia[i+3] = ib[i] + ic[i];
|
38 |
|
|
}
|
39 |
|
|
|
40 |
|
|
/* check results: */
|
41 |
|
|
for (i = 0; i < n; i++)
|
42 |
|
|
{
|
43 |
|
|
if (sa[i+7] != sb[i] + sc[i] || ia[i+3] != ib[i] + ic[i])
|
44 |
|
|
abort ();
|
45 |
|
|
}
|
46 |
|
|
|
47 |
|
|
return 0;
|
48 |
|
|
}
|
49 |
|
|
|
50 |
|
|
/* Current peeling-for-alignment scheme will consider the 'ia[i+3]'
|
51 |
|
|
access for peeling, and therefore will examine the option of
|
52 |
|
|
using a peeling factor = VF-3%VF. This will result in a peeling factor
|
53 |
|
|
1 if VF=4,2. This will not align the access to 'sa[i+3]', for which we
|
54 |
|
|
need to peel 5,1 iterations for VF=4,2 respectively, so the loop can not
|
55 |
|
|
be vectorized. However, 'ia[i+3]' also gets aligned if we peel 5
|
56 |
|
|
iterations, so the loop is vectorizable on all targets that support
|
57 |
|
|
unaligned loads.
|
58 |
|
|
Without cost model on targets that support misaligned stores, no peeling
|
59 |
|
|
will be applied since we want to keep the four loads aligned. */
|
60 |
|
|
|
61 |
|
|
__attribute__ ((noinline))
|
62 |
|
|
int main2 (int n)
|
63 |
|
|
{
|
64 |
|
|
int i;
|
65 |
|
|
|
66 |
|
|
/* Multiple types with different sizes, used in independent
|
67 |
|
|
copmutations. Vectorizable. */
|
68 |
|
|
for (i = 0; i < n; i++)
|
69 |
|
|
{
|
70 |
|
|
ia[i+3] = ib[i] + ic[i];
|
71 |
|
|
sa[i+3] = sb[i] + sc[i];
|
72 |
|
|
}
|
73 |
|
|
|
74 |
|
|
/* check results: */
|
75 |
|
|
for (i = 0; i < n; i++)
|
76 |
|
|
{
|
77 |
|
|
if (sa[i+3] != sb[i] + sc[i] || ia[i+3] != ib[i] + ic[i])
|
78 |
|
|
abort ();
|
79 |
|
|
}
|
80 |
|
|
|
81 |
|
|
return 0;
|
82 |
|
|
}
|
83 |
|
|
|
84 |
|
|
int main (void)
|
85 |
|
|
{
|
86 |
|
|
check_vect ();
|
87 |
|
|
|
88 |
|
|
main1 (N-7);
|
89 |
|
|
main2 (N-3);
|
90 |
|
|
|
91 |
|
|
return 0;
|
92 |
|
|
}
|
93 |
|
|
|
94 |
|
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_align } } } } */
|
95 |
|
|
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { target { vect_element_align} } } } */
|
96 |
|
|
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail { vect_no_align || vect_element_align } } } } */
|
97 |
|
|
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail { vect_no_align || vect_element_align } } } } */
|
98 |
|
|
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { target { vect_element_align } } } } */
|
99 |
|
|
/* { dg-final { cleanup-tree-dump "vect" } } */
|
100 |
|
|
|