OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [tree-vect-generic.c] - Blame information for rev 728

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Lower vector operations to scalar operations.
2
   Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010
3
   Free Software Foundation, Inc.
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify it
8
under the terms of the GNU General Public License as published by the
9
Free Software Foundation; either version 3, or (at your option) any
10
later version.
11
 
12
GCC is distributed in the hope that it will be useful, but WITHOUT
13
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15
for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING3.  If not see
19
<http://www.gnu.org/licenses/>.  */
20
 
21
#include "config.h"
22
#include "system.h"
23
#include "coretypes.h"
24
#include "tree.h"
25
#include "tm.h"
26
#include "langhooks.h"
27
#include "tree-flow.h"
28
#include "gimple.h"
29
#include "tree-iterator.h"
30
#include "tree-pass.h"
31
#include "flags.h"
32
#include "ggc.h"
33
#include "diagnostic.h"
34
 
35
/* Need to include rtl.h, expr.h, etc. for optabs.  */
36
#include "expr.h"
37
#include "optabs.h"
38
 
39
 
40
static void expand_vector_operations_1 (gimple_stmt_iterator *);
41
 
42
 
43
/* Build a constant of type TYPE, made of VALUE's bits replicated
44
   every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
45
static tree
46
build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
47
{
48
  int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
49
  int n = HOST_BITS_PER_WIDE_INT / width;
50
  unsigned HOST_WIDE_INT low, high, mask;
51
  tree ret;
52
 
53
  gcc_assert (n);
54
 
55
  if (width == HOST_BITS_PER_WIDE_INT)
56
    low = value;
57
  else
58
    {
59
      mask = ((HOST_WIDE_INT)1 << width) - 1;
60
      low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
61
    }
62
 
63
  if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
64
    low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
65
  else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
66
    high = 0;
67
  else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
68
    high = low;
69
  else
70
    gcc_unreachable ();
71
 
72
  ret = build_int_cst_wide (type, low, high);
73
  return ret;
74
}
75
 
76
static GTY(()) tree vector_inner_type;
77
static GTY(()) tree vector_last_type;
78
static GTY(()) int vector_last_nunits;
79
 
80
/* Return a suitable vector types made of SUBPARTS units each of mode
81
   "word_mode" (the global variable).  */
82
static tree
83
build_word_mode_vector_type (int nunits)
84
{
85
  if (!vector_inner_type)
86
    vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
87
  else if (vector_last_nunits == nunits)
88
    {
89
      gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
90
      return vector_last_type;
91
    }
92
 
93
  /* We build a new type, but we canonicalize it nevertheless,
94
     because it still saves some memory.  */
95
  vector_last_nunits = nunits;
96
  vector_last_type = type_hash_canon (nunits,
97
                                      build_vector_type (vector_inner_type,
98
                                                         nunits));
99
  return vector_last_type;
100
}
101
 
102
typedef tree (*elem_op_func) (gimple_stmt_iterator *,
103
                              tree, tree, tree, tree, tree, enum tree_code);
104
 
105
static inline tree
106
tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
107
                  tree t, tree bitsize, tree bitpos)
108
{
109
  if (bitpos)
110
    return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
111
  else
112
    return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
113
}
114
 
115
static tree
116
do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
117
         tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
118
         enum tree_code code)
119
{
120
  a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
121
  return gimplify_build1 (gsi, code, inner_type, a);
122
}
123
 
124
static tree
125
do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
126
          tree bitpos, tree bitsize, enum tree_code code)
127
{
128
  if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
129
    a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
130
  if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
131
    b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
132
  return gimplify_build2 (gsi, code, inner_type, a, b);
133
}
134
 
135
/* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
136
 
137
   INNER_TYPE is the type of A and B elements
138
 
139
   returned expression is of signed integer type with the
140
   size equal to the size of INNER_TYPE.  */
141
static tree
142
do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
143
          tree bitpos, tree bitsize, enum tree_code code)
144
{
145
  tree comp_type;
146
 
147
  a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
148
  b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
149
 
150
  comp_type = build_nonstandard_integer_type
151
                      (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
152
 
153
  return gimplify_build3 (gsi, COND_EXPR, comp_type,
154
                          fold_build2 (code, boolean_type_node, a, b),
155
                          build_int_cst (comp_type, -1),
156
                          build_int_cst (comp_type, 0));
157
}
158
 
159
/* Expand vector addition to scalars.  This does bit twiddling
160
   in order to increase parallelism:
161
 
162
   a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
163
           (a ^ b) & 0x80808080
164
 
165
   a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
166
            (a ^ ~b) & 0x80808080
167
 
168
   -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
169
 
170
   This optimization should be done only if 4 vector items or more
171
   fit into a word.  */
172
static tree
173
do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
174
               tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
175
               enum tree_code code)
176
{
177
  tree inner_type = TREE_TYPE (TREE_TYPE (a));
178
  unsigned HOST_WIDE_INT max;
179
  tree low_bits, high_bits, a_low, b_low, result_low, signs;
180
 
181
  max = GET_MODE_MASK (TYPE_MODE (inner_type));
182
  low_bits = build_replicated_const (word_type, inner_type, max >> 1);
183
  high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
184
 
185
  a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
186
  b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
187
 
188
  signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
189
  b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
190
  if (code == PLUS_EXPR)
191
    a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
192
  else
193
    {
194
      a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
195
      signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
196
    }
197
 
198
  signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
199
  result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
200
  return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
201
}
202
 
203
static tree
204
do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
205
           tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
206
           tree bitsize ATTRIBUTE_UNUSED,
207
           enum tree_code code ATTRIBUTE_UNUSED)
208
{
209
  tree inner_type = TREE_TYPE (TREE_TYPE (b));
210
  HOST_WIDE_INT max;
211
  tree low_bits, high_bits, b_low, result_low, signs;
212
 
213
  max = GET_MODE_MASK (TYPE_MODE (inner_type));
214
  low_bits = build_replicated_const (word_type, inner_type, max >> 1);
215
  high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
216
 
217
  b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
218
 
219
  b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
220
  signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
221
  signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
222
  result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
223
  return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
224
}
225
 
226
/* Expand a vector operation to scalars, by using many operations
227
   whose type is the vector type's inner type.  */
228
static tree
229
expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
230
                         tree type, tree inner_type,
231
                         tree a, tree b, enum tree_code code)
232
{
233
  VEC(constructor_elt,gc) *v;
234
  tree part_width = TYPE_SIZE (inner_type);
235
  tree index = bitsize_int (0);
236
  int nunits = TYPE_VECTOR_SUBPARTS (type);
237
  int delta = tree_low_cst (part_width, 1)
238
              / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
239
  int i;
240
  location_t loc = gimple_location (gsi_stmt (*gsi));
241
 
242
  if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
243
    warning_at (loc, OPT_Wvector_operation_performance,
244
                "vector operation will be expanded piecewise");
245
  else
246
    warning_at (loc, OPT_Wvector_operation_performance,
247
                "vector operation will be expanded in parallel");
248
 
249
  v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
250
  for (i = 0; i < nunits;
251
       i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
252
    {
253
      tree result = f (gsi, inner_type, a, b, index, part_width, code);
254
      constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
255
      ce->index = NULL_TREE;
256
      ce->value = result;
257
    }
258
 
259
  return build_constructor (type, v);
260
}
261
 
262
/* Expand a vector operation to scalars with the freedom to use
263
   a scalar integer type, or to use a different size for the items
264
   in the vector type.  */
265
static tree
266
expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
267
                        tree a, tree b,
268
                        enum tree_code code)
269
{
270
  tree result, compute_type;
271
  enum machine_mode mode;
272
  int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
273
  location_t loc = gimple_location (gsi_stmt (*gsi));
274
 
275
  /* We have three strategies.  If the type is already correct, just do
276
     the operation an element at a time.  Else, if the vector is wider than
277
     one word, do it a word at a time; finally, if the vector is smaller
278
     than one word, do it as a scalar.  */
279
  if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
280
     return expand_vector_piecewise (gsi, f,
281
                                     type, TREE_TYPE (type),
282
                                     a, b, code);
283
  else if (n_words > 1)
284
    {
285
      tree word_type = build_word_mode_vector_type (n_words);
286
      result = expand_vector_piecewise (gsi, f,
287
                                        word_type, TREE_TYPE (word_type),
288
                                        a, b, code);
289
      result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
290
                                         GSI_SAME_STMT);
291
    }
292
  else
293
    {
294
      /* Use a single scalar operation with a mode no wider than word_mode.  */
295
      mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
296
      compute_type = lang_hooks.types.type_for_mode (mode, 1);
297
      result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
298
      warning_at (loc, OPT_Wvector_operation_performance,
299
                  "vector operation will be expanded with a "
300
                  "single scalar operation");
301
    }
302
 
303
  return result;
304
}
305
 
306
/* Expand a vector operation to scalars; for integer types we can use
307
   special bit twiddling tricks to do the sums a word at a time, using
308
   function F_PARALLEL instead of F.  These tricks are done only if
309
   they can process at least four items, that is, only if the vector
310
   holds at least four items and if a word can hold four items.  */
311
static tree
312
expand_vector_addition (gimple_stmt_iterator *gsi,
313
                        elem_op_func f, elem_op_func f_parallel,
314
                        tree type, tree a, tree b, enum tree_code code)
315
{
316
  int parts_per_word = UNITS_PER_WORD
317
                       / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
318
 
319
  if (INTEGRAL_TYPE_P (TREE_TYPE (type))
320
      && parts_per_word >= 4
321
      && TYPE_VECTOR_SUBPARTS (type) >= 4)
322
    return expand_vector_parallel (gsi, f_parallel,
323
                                   type, a, b, code);
324
  else
325
    return expand_vector_piecewise (gsi, f,
326
                                    type, TREE_TYPE (type),
327
                                    a, b, code);
328
}
329
 
330
/* Check if vector VEC consists of all the equal elements and
331
   that the number of elements corresponds to the type of VEC.
332
   The function returns first element of the vector
333
   or NULL_TREE if the vector is not uniform.  */
334
static tree
335
uniform_vector_p (tree vec)
336
{
337
  tree first, t, els;
338
  unsigned i;
339
 
340
  if (vec == NULL_TREE)
341
    return NULL_TREE;
342
 
343
  if (TREE_CODE (vec) == VECTOR_CST)
344
    {
345
      els = TREE_VECTOR_CST_ELTS (vec);
346
      first = TREE_VALUE (els);
347
      els = TREE_CHAIN (els);
348
 
349
      for (t = els; t; t = TREE_CHAIN (t))
350
        if (!operand_equal_p (first, TREE_VALUE (t), 0))
351
          return NULL_TREE;
352
 
353
      return first;
354
    }
355
 
356
  else if (TREE_CODE (vec) == CONSTRUCTOR)
357
    {
358
      first = error_mark_node;
359
 
360
      FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
361
        {
362
          if (i == 0)
363
            {
364
              first = t;
365
              continue;
366
            }
367
          if (!operand_equal_p (first, t, 0))
368
            return NULL_TREE;
369
        }
370
      if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
371
        return NULL_TREE;
372
 
373
      return first;
374
    }
375
 
376
  return NULL_TREE;
377
}
378
 
379
/* Try to expand vector comparison expression OP0 CODE OP1 by
380
   querying optab if the following expression:
381
        VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
382
   can be expanded.  */
383
static tree
384
expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
385
                          tree op1, enum tree_code code)
386
{
387
  tree t;
388
  if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
389
    t = expand_vector_piecewise (gsi, do_compare, type,
390
                                 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
391
  else
392
    t = NULL_TREE;
393
 
394
  return t;
395
}
396
 
397
static tree
398
expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
399
                         gimple assign, enum tree_code code)
400
{
401
  enum machine_mode compute_mode = TYPE_MODE (compute_type);
402
 
403
  /* If the compute mode is not a vector mode (hence we are not decomposing
404
     a BLKmode vector to smaller, hardware-supported vectors), we may want
405
     to expand the operations in parallel.  */
406
  if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
407
      && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
408
      && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
409
      && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
410
      && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
411
      && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
412
    switch (code)
413
      {
414
      case PLUS_EXPR:
415
      case MINUS_EXPR:
416
        if (!TYPE_OVERFLOW_TRAPS (type))
417
          return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
418
                                         gimple_assign_rhs1 (assign),
419
                                         gimple_assign_rhs2 (assign), code);
420
        break;
421
 
422
      case NEGATE_EXPR:
423
        if (!TYPE_OVERFLOW_TRAPS (type))
424
          return expand_vector_addition (gsi, do_unop, do_negate, type,
425
                                         gimple_assign_rhs1 (assign),
426
                                         NULL_TREE, code);
427
        break;
428
 
429
      case BIT_AND_EXPR:
430
      case BIT_IOR_EXPR:
431
      case BIT_XOR_EXPR:
432
        return expand_vector_parallel (gsi, do_binop, type,
433
                                       gimple_assign_rhs1 (assign),
434
                                       gimple_assign_rhs2 (assign), code);
435
 
436
      case BIT_NOT_EXPR:
437
        return expand_vector_parallel (gsi, do_unop, type,
438
                                       gimple_assign_rhs1 (assign),
439
                                       NULL_TREE, code);
440
      case EQ_EXPR:
441
      case NE_EXPR:
442
      case GT_EXPR:
443
      case LT_EXPR:
444
      case GE_EXPR:
445
      case LE_EXPR:
446
      case UNEQ_EXPR:
447
      case UNGT_EXPR:
448
      case UNLT_EXPR:
449
      case UNGE_EXPR:
450
      case UNLE_EXPR:
451
      case LTGT_EXPR:
452
      case ORDERED_EXPR:
453
      case UNORDERED_EXPR:
454
        {
455
          tree rhs1 = gimple_assign_rhs1 (assign);
456
          tree rhs2 = gimple_assign_rhs2 (assign);
457
 
458
          return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
459
        }
460
      default:
461
        break;
462
      }
463
 
464
  if (TREE_CODE_CLASS (code) == tcc_unary)
465
    return expand_vector_piecewise (gsi, do_unop, type, compute_type,
466
                                    gimple_assign_rhs1 (assign),
467
                                    NULL_TREE, code);
468
  else
469
    return expand_vector_piecewise (gsi, do_binop, type, compute_type,
470
                                    gimple_assign_rhs1 (assign),
471
                                    gimple_assign_rhs2 (assign), code);
472
}
473
 
474
/* Return a type for the widest vector mode whose components are of mode
475
   INNER_MODE, or NULL_TREE if none is found.
476
   SATP is true for saturating fixed-point types.  */
477
 
478
static tree
479
type_for_widest_vector_mode (enum machine_mode inner_mode, optab op, int satp)
480
{
481
  enum machine_mode best_mode = VOIDmode, mode;
482
  int best_nunits = 0;
483
 
484
  if (SCALAR_FLOAT_MODE_P (inner_mode))
485
    mode = MIN_MODE_VECTOR_FLOAT;
486
  else if (SCALAR_FRACT_MODE_P (inner_mode))
487
    mode = MIN_MODE_VECTOR_FRACT;
488
  else if (SCALAR_UFRACT_MODE_P (inner_mode))
489
    mode = MIN_MODE_VECTOR_UFRACT;
490
  else if (SCALAR_ACCUM_MODE_P (inner_mode))
491
    mode = MIN_MODE_VECTOR_ACCUM;
492
  else if (SCALAR_UACCUM_MODE_P (inner_mode))
493
    mode = MIN_MODE_VECTOR_UACCUM;
494
  else
495
    mode = MIN_MODE_VECTOR_INT;
496
 
497
  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
498
    if (GET_MODE_INNER (mode) == inner_mode
499
        && GET_MODE_NUNITS (mode) > best_nunits
500
        && optab_handler (op, mode) != CODE_FOR_nothing)
501
      best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
502
 
503
  if (best_mode == VOIDmode)
504
    return NULL_TREE;
505
  else
506
    {
507
      /* For fixed-point modes, we need to pass satp as the 2nd parameter.  */
508
      if (ALL_FIXED_POINT_MODE_P (best_mode))
509
        return lang_hooks.types.type_for_mode (best_mode, satp);
510
 
511
      return lang_hooks.types.type_for_mode (best_mode, 1);
512
    }
513
}
514
 
515
 
516
/* Build a reference to the element of the vector VECT.  Function
517
   returns either the element itself, either BIT_FIELD_REF, or an
518
   ARRAY_REF expression.
519
 
520
   GSI is requred to insert temporary variables while building a
521
   refernece to the element of the vector VECT.
522
 
523
   PTMPVEC is a pointer to the temporary variable for caching
524
   purposes.  In case when PTMPVEC is NULL new temporary variable
525
   will be created.  */
526
static tree
527
vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
528
{
529
  tree vect_type, vect_elt_type;
530
  gimple asgn;
531
  tree tmpvec;
532
  tree arraytype;
533
  bool need_asgn = true;
534
  unsigned int elements;
535
 
536
  vect_type = TREE_TYPE (vect);
537
  vect_elt_type = TREE_TYPE (vect_type);
538
  elements = TYPE_VECTOR_SUBPARTS (vect_type);
539
 
540
  if (TREE_CODE (idx) == INTEGER_CST)
541
    {
542
      unsigned HOST_WIDE_INT index;
543
 
544
      /* Given that we're about to compute a binary modulus,
545
         we don't care about the high bits of the value.  */
546
      index = TREE_INT_CST_LOW (idx);
547
      if (!host_integerp (idx, 1) || index >= elements)
548
        {
549
          index &= elements - 1;
550
          idx = build_int_cst (TREE_TYPE (idx), index);
551
        }
552
 
553
      /* When lowering a vector statement sequence do some easy
554
         simplification by looking through intermediate vector results.  */
555
      if (TREE_CODE (vect) == SSA_NAME)
556
        {
557
          gimple def_stmt = SSA_NAME_DEF_STMT (vect);
558
          if (is_gimple_assign (def_stmt)
559
              && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
560
                  || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
561
            vect = gimple_assign_rhs1 (def_stmt);
562
        }
563
 
564
      if (TREE_CODE (vect) == VECTOR_CST)
565
        {
566
          unsigned i;
567
          tree vals = TREE_VECTOR_CST_ELTS (vect);
568
          for (i = 0; vals; vals = TREE_CHAIN (vals), ++i)
569
            if (i == index)
570
               return TREE_VALUE (vals);
571
          return build_zero_cst (vect_elt_type);
572
        }
573
      else if (TREE_CODE (vect) == CONSTRUCTOR)
574
        {
575
          unsigned i;
576
          tree elt_i, elt_v;
577
 
578
          FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (vect), i, elt_i, elt_v)
579
            if (operand_equal_p (elt_i, idx, 0))
580
              return elt_v;
581
          return build_zero_cst (vect_elt_type);
582
        }
583
      else
584
        {
585
          tree size = TYPE_SIZE (vect_elt_type);
586
          tree pos = fold_build2 (MULT_EXPR, TREE_TYPE (idx), idx, size);
587
          return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
588
        }
589
    }
590
 
591
  if (!ptmpvec)
592
    tmpvec = create_tmp_var (vect_type, "vectmp");
593
  else if (!*ptmpvec)
594
    tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
595
  else
596
    {
597
      tmpvec = *ptmpvec;
598
      need_asgn = false;
599
    }
600
 
601
  if (need_asgn)
602
    {
603
      TREE_ADDRESSABLE (tmpvec) = 1;
604
      asgn = gimple_build_assign (tmpvec, vect);
605
      gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
606
    }
607
 
608
  arraytype = build_array_type_nelts (vect_elt_type, elements);
609
  return build4 (ARRAY_REF, vect_elt_type,
610
                 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
611
                 idx, NULL_TREE, NULL_TREE);
612
}
613
 
614
/* Check if VEC_PERM_EXPR within the given setting is supported
615
   by hardware, or lower it piecewise.
616
 
617
   When VEC_PERM_EXPR has the same first and second operands:
618
   VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
619
   {v0[mask[0]], v0[mask[1]], ...}
620
   MASK and V0 must have the same number of elements.
621
 
622
   Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
623
   {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
624
   V0 and V1 must have the same type.  MASK, V0, V1 must have the
625
   same number of arguments.  */
626
 
627
static void
628
lower_vec_perm (gimple_stmt_iterator *gsi)
629
{
630
  gimple stmt = gsi_stmt (*gsi);
631
  tree mask = gimple_assign_rhs3 (stmt);
632
  tree vec0 = gimple_assign_rhs1 (stmt);
633
  tree vec1 = gimple_assign_rhs2 (stmt);
634
  tree vect_type = TREE_TYPE (vec0);
635
  tree mask_type = TREE_TYPE (mask);
636
  tree vect_elt_type = TREE_TYPE (vect_type);
637
  tree mask_elt_type = TREE_TYPE (mask_type);
638
  unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
639
  VEC(constructor_elt,gc) *v;
640
  tree constr, t, si, i_val;
641
  tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
642
  bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
643
  location_t loc = gimple_location (gsi_stmt (*gsi));
644
  unsigned i;
645
 
646
  if (TREE_CODE (mask) == VECTOR_CST)
647
    {
648
      unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
649
      tree vals = TREE_VECTOR_CST_ELTS (mask);
650
 
651
      for (i = 0; i < elements; ++i, vals = TREE_CHAIN (vals))
652
        sel_int[i] = TREE_INT_CST_LOW (TREE_VALUE (vals)) & (2 * elements - 1);
653
 
654
      if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
655
        return;
656
    }
657
  else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
658
    return;
659
 
660
  warning_at (loc, OPT_Wvector_operation_performance,
661
              "vector shuffling operation will be expanded piecewise");
662
 
663
  v = VEC_alloc (constructor_elt, gc, elements);
664
  for (i = 0; i < elements; i++)
665
    {
666
      si = size_int (i);
667
      i_val = vector_element (gsi, mask, si, &masktmp);
668
 
669
      if (TREE_CODE (i_val) == INTEGER_CST)
670
        {
671
          unsigned HOST_WIDE_INT index;
672
 
673
          index = TREE_INT_CST_LOW (i_val);
674
          if (!host_integerp (i_val, 1) || index >= elements)
675
            i_val = build_int_cst (mask_elt_type, index & (elements - 1));
676
 
677
          if (two_operand_p && (index & elements) != 0)
678
            t = vector_element (gsi, vec1, i_val, &vec1tmp);
679
          else
680
            t = vector_element (gsi, vec0, i_val, &vec0tmp);
681
 
682
          t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
683
                                        true, GSI_SAME_STMT);
684
        }
685
      else
686
        {
687
          tree cond = NULL_TREE, v0_val;
688
 
689
          if (two_operand_p)
690
            {
691
              cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
692
                                  build_int_cst (mask_elt_type, elements));
693
              cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
694
                                               true, GSI_SAME_STMT);
695
            }
696
 
697
          i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
698
                               build_int_cst (mask_elt_type, elements - 1));
699
          i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
700
                                            true, GSI_SAME_STMT);
701
 
702
          v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
703
          v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
704
                                             true, GSI_SAME_STMT);
705
 
706
          if (two_operand_p)
707
            {
708
              tree v1_val;
709
 
710
              v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
711
              v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
712
                                                 true, GSI_SAME_STMT);
713
 
714
              cond = fold_build2 (EQ_EXPR, boolean_type_node,
715
                                  cond, build_zero_cst (mask_elt_type));
716
              cond = fold_build3 (COND_EXPR, vect_elt_type,
717
                                  cond, v0_val, v1_val);
718
              t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
719
                                            true, GSI_SAME_STMT);
720
            }
721
          else
722
            t = v0_val;
723
        }
724
 
725
      CONSTRUCTOR_APPEND_ELT (v, si, t);
726
    }
727
 
728
  constr = build_constructor (vect_type, v);
729
  gimple_assign_set_rhs_from_tree (gsi, constr);
730
  update_stmt (gsi_stmt (*gsi));
731
}
732
 
733
/* Process one statement.  If we identify a vector operation, expand it.  */
734
 
735
static void
736
expand_vector_operations_1 (gimple_stmt_iterator *gsi)
737
{
738
  gimple stmt = gsi_stmt (*gsi);
739
  tree lhs, rhs1, rhs2 = NULL, type, compute_type;
740
  enum tree_code code;
741
  enum machine_mode compute_mode;
742
  optab op = NULL;
743
  enum gimple_rhs_class rhs_class;
744
  tree new_rhs;
745
 
746
  if (gimple_code (stmt) != GIMPLE_ASSIGN)
747
    return;
748
 
749
  code = gimple_assign_rhs_code (stmt);
750
  rhs_class = get_gimple_rhs_class (code);
751
  lhs = gimple_assign_lhs (stmt);
752
 
753
  if (code == VEC_PERM_EXPR)
754
    {
755
      lower_vec_perm (gsi);
756
      return;
757
    }
758
 
759
  if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
760
    return;
761
 
762
  rhs1 = gimple_assign_rhs1 (stmt);
763
  type = gimple_expr_type (stmt);
764
  if (rhs_class == GIMPLE_BINARY_RHS)
765
    rhs2 = gimple_assign_rhs2 (stmt);
766
 
767
  if (TREE_CODE (type) != VECTOR_TYPE)
768
    return;
769
 
770
  if (code == NOP_EXPR
771
      || code == FLOAT_EXPR
772
      || code == FIX_TRUNC_EXPR
773
      || code == VIEW_CONVERT_EXPR)
774
    return;
775
 
776
  gcc_assert (code != CONVERT_EXPR);
777
 
778
  /* The signedness is determined from input argument.  */
779
  if (code == VEC_UNPACK_FLOAT_HI_EXPR
780
      || code == VEC_UNPACK_FLOAT_LO_EXPR)
781
    type = TREE_TYPE (rhs1);
782
 
783
  /* Choose between vector shift/rotate by vector and vector shift/rotate by
784
     scalar */
785
  if (code == LSHIFT_EXPR
786
      || code == RSHIFT_EXPR
787
      || code == LROTATE_EXPR
788
      || code == RROTATE_EXPR)
789
    {
790
      optab opv;
791
 
792
      /* Check whether we have vector <op> {x,x,x,x} where x
793
         could be a scalar variable or a constant.  Transform
794
         vector <op> {x,x,x,x} ==> vector <op> scalar.  */
795
      if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
796
        {
797
          tree first;
798
          gimple def_stmt;
799
 
800
          if ((TREE_CODE (rhs2) == VECTOR_CST
801
               && (first = uniform_vector_p (rhs2)) != NULL_TREE)
802
              || (TREE_CODE (rhs2) == SSA_NAME
803
                  && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
804
                  && gimple_assign_single_p (def_stmt)
805
                  && (first = uniform_vector_p
806
                      (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
807
            {
808
              gimple_assign_set_rhs2 (stmt, first);
809
              update_stmt (stmt);
810
              rhs2 = first;
811
            }
812
        }
813
 
814
      opv = optab_for_tree_code (code, type, optab_vector);
815
      if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
816
        op = opv;
817
      else
818
        {
819
          op = optab_for_tree_code (code, type, optab_scalar);
820
 
821
          /* The rtl expander will expand vector/scalar as vector/vector
822
             if necessary.  Don't bother converting the stmt here.  */
823
          if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing
824
              && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing)
825
            return;
826
        }
827
    }
828
  else
829
    op = optab_for_tree_code (code, type, optab_default);
830
 
831
  /* For widening/narrowing vector operations, the relevant type is of the
832
     arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
833
     calculated in the same way above.  */
834
  if (code == WIDEN_SUM_EXPR
835
      || code == VEC_WIDEN_MULT_HI_EXPR
836
      || code == VEC_WIDEN_MULT_LO_EXPR
837
      || code == VEC_UNPACK_HI_EXPR
838
      || code == VEC_UNPACK_LO_EXPR
839
      || code == VEC_PACK_TRUNC_EXPR
840
      || code == VEC_PACK_SAT_EXPR
841
      || code == VEC_PACK_FIX_TRUNC_EXPR
842
      || code == VEC_WIDEN_LSHIFT_HI_EXPR
843
      || code == VEC_WIDEN_LSHIFT_LO_EXPR)
844
    type = TREE_TYPE (rhs1);
845
 
846
  /* Optabs will try converting a negation into a subtraction, so
847
     look for it as well.  TODO: negation of floating-point vectors
848
     might be turned into an exclusive OR toggling the sign bit.  */
849
  if (op == NULL
850
      && code == NEGATE_EXPR
851
      && INTEGRAL_TYPE_P (TREE_TYPE (type)))
852
    op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
853
 
854
  /* For very wide vectors, try using a smaller vector mode.  */
855
  compute_type = type;
856
  if (!VECTOR_MODE_P (TYPE_MODE (type)) && op)
857
    {
858
      tree vector_compute_type
859
        = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op,
860
                                       TYPE_SATURATING (TREE_TYPE (type)));
861
      if (vector_compute_type != NULL_TREE
862
          && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
863
              < TYPE_VECTOR_SUBPARTS (compute_type))
864
          && (optab_handler (op, TYPE_MODE (vector_compute_type))
865
              != CODE_FOR_nothing))
866
        compute_type = vector_compute_type;
867
    }
868
 
869
  /* If we are breaking a BLKmode vector into smaller pieces,
870
     type_for_widest_vector_mode has already looked into the optab,
871
     so skip these checks.  */
872
  if (compute_type == type)
873
    {
874
      compute_mode = TYPE_MODE (compute_type);
875
      if (VECTOR_MODE_P (compute_mode)
876
          && op != NULL
877
          && optab_handler (op, compute_mode) != CODE_FOR_nothing)
878
        return;
879
      else
880
        /* There is no operation in hardware, so fall back to scalars.  */
881
        compute_type = TREE_TYPE (type);
882
    }
883
 
884
  gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
885
  new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
886
 
887
  /* Leave expression untouched for later expansion.  */
888
  if (new_rhs == NULL_TREE)
889
    return;
890
 
891
  if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
892
    new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
893
                               new_rhs);
894
 
895
  /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
896
     way to do it is change expand_vector_operation and its callees to
897
     return a tree_code, RHS1 and RHS2 instead of a tree. */
898
  gimple_assign_set_rhs_from_tree (gsi, new_rhs);
899
  update_stmt (gsi_stmt (*gsi));
900
}
901
 
902
/* Use this to lower vector operations introduced by the vectorizer,
903
   if it may need the bit-twiddling tricks implemented in this file.  */
904
 
905
static bool
906
gate_expand_vector_operations_ssa (void)
907
{
908
  return optimize == 0;
909
}
910
 
911
static unsigned int
912
expand_vector_operations (void)
913
{
914
  gimple_stmt_iterator gsi;
915
  basic_block bb;
916
  bool cfg_changed = false;
917
 
918
  FOR_EACH_BB (bb)
919
    {
920
      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
921
        {
922
          expand_vector_operations_1 (&gsi);
923
          /* ???  If we do not cleanup EH then we will ICE in
924
             verification.  But in reality we have created wrong-code
925
             as we did not properly transition EH info and edges to
926
             the piecewise computations.  */
927
          if (maybe_clean_eh_stmt (gsi_stmt (gsi))
928
              && gimple_purge_dead_eh_edges (bb))
929
            cfg_changed = true;
930
        }
931
    }
932
 
933
  return cfg_changed ? TODO_cleanup_cfg : 0;
934
}
935
 
936
struct gimple_opt_pass pass_lower_vector =
937
{
938
 {
939
  GIMPLE_PASS,
940
  "veclower",                           /* name */
941
  gate_expand_vector_operations_ssa,    /* gate */
942
  expand_vector_operations,             /* execute */
943
  NULL,                                 /* sub */
944
  NULL,                                 /* next */
945
  0,                                     /* static_pass_number */
946
  TV_NONE,                              /* tv_id */
947
  PROP_cfg,                             /* properties_required */
948
  0,                                     /* properties_provided */
949
  0,                                     /* properties_destroyed */
950
  0,                                     /* todo_flags_start */
951
  TODO_update_ssa                       /* todo_flags_finish */
952
    | TODO_verify_ssa
953
    | TODO_verify_stmts | TODO_verify_flow
954
    | TODO_cleanup_cfg
955
 }
956
};
957
 
958
struct gimple_opt_pass pass_lower_vector_ssa =
959
{
960
 {
961
  GIMPLE_PASS,
962
  "veclower2",                          /* name */
963
  0,                                     /* gate */
964
  expand_vector_operations,             /* execute */
965
  NULL,                                 /* sub */
966
  NULL,                                 /* next */
967
  0,                                     /* static_pass_number */
968
  TV_NONE,                              /* tv_id */
969
  PROP_cfg,                             /* properties_required */
970
  0,                                     /* properties_provided */
971
  0,                                     /* properties_destroyed */
972
  0,                                     /* todo_flags_start */
973
  TODO_update_ssa                       /* todo_flags_finish */
974
    | TODO_verify_ssa
975
    | TODO_verify_stmts | TODO_verify_flow
976
    | TODO_cleanup_cfg
977
 }
978
};
979
 
980
#include "gt-tree-vect-generic.h"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.