OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-old/] [gcc-4.2.2/] [gcc/] [tree-vect-generic.c] - Blame information for rev 857

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Lower vector operations to scalar operations.
2
   Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3
 
4
This file is part of GCC.
5
 
6
GCC is free software; you can redistribute it and/or modify it
7
under the terms of the GNU General Public License as published by the
8
Free Software Foundation; either version 3, or (at your option) any
9
later version.
10
 
11
GCC is distributed in the hope that it will be useful, but WITHOUT
12
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GCC; see the file COPYING3.  If not see
18
<http://www.gnu.org/licenses/>.  */
19
 
20
#include "config.h"
21
#include "system.h"
22
#include "coretypes.h"
23
#include "tree.h"
24
#include "tm.h"
25
#include "rtl.h"
26
#include "expr.h"
27
#include "insn-codes.h"
28
#include "diagnostic.h"
29
#include "optabs.h"
30
#include "machmode.h"
31
#include "langhooks.h"
32
#include "tree-flow.h"
33
#include "tree-gimple.h"
34
#include "tree-iterator.h"
35
#include "tree-pass.h"
36
#include "flags.h"
37
#include "ggc.h"
38
 
39
 
40
/* Build a constant of type TYPE, made of VALUE's bits replicated
41
   every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
42
static tree
43
build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
44
{
45
  int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
46
  int n = HOST_BITS_PER_WIDE_INT / width;
47
  unsigned HOST_WIDE_INT low, high, mask;
48
  tree ret;
49
 
50
  gcc_assert (n);
51
 
52
  if (width == HOST_BITS_PER_WIDE_INT)
53
    low = value;
54
  else
55
    {
56
      mask = ((HOST_WIDE_INT)1 << width) - 1;
57
      low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
58
    }
59
 
60
  if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
61
    low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
62
  else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
63
    high = 0;
64
  else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
65
    high = low;
66
  else
67
    gcc_unreachable ();
68
 
69
  ret = build_int_cst_wide (type, low, high);
70
  return ret;
71
}
72
 
73
static GTY(()) tree vector_inner_type;
74
static GTY(()) tree vector_last_type;
75
static GTY(()) int vector_last_nunits;
76
 
77
/* Return a suitable vector types made of SUBPARTS units each of mode
78
   "word_mode" (the global variable).  */
79
static tree
80
build_word_mode_vector_type (int nunits)
81
{
82
  if (!vector_inner_type)
83
    vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
84
  else if (vector_last_nunits == nunits)
85
    {
86
      gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
87
      return vector_last_type;
88
    }
89
 
90
  /* We build a new type, but we canonicalize it nevertheless,
91
     because it still saves some memory.  */
92
  vector_last_nunits = nunits;
93
  vector_last_type = type_hash_canon (nunits,
94
                                      build_vector_type (vector_inner_type,
95
                                                         nunits));
96
  return vector_last_type;
97
}
98
 
99
typedef tree (*elem_op_func) (block_stmt_iterator *,
100
                              tree, tree, tree, tree, tree, enum tree_code);
101
 
102
static inline tree
103
tree_vec_extract (block_stmt_iterator *bsi, tree type,
104
                  tree t, tree bitsize, tree bitpos)
105
{
106
  if (bitpos)
107
    return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
108
  else
109
    return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
110
}
111
 
112
static tree
113
do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
114
         tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
115
         enum tree_code code)
116
{
117
  a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
118
  return gimplify_build1 (bsi, code, inner_type, a);
119
}
120
 
121
static tree
122
do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
123
          tree bitpos, tree bitsize, enum tree_code code)
124
{
125
  a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
126
  b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
127
  return gimplify_build2 (bsi, code, inner_type, a, b);
128
}
129
 
130
/* Expand vector addition to scalars.  This does bit twiddling
131
   in order to increase parallelism:
132
 
133
   a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
134
           (a ^ b) & 0x80808080
135
 
136
   a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
137
            (a ^ ~b) & 0x80808080
138
 
139
   -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
140
 
141
   This optimization should be done only if 4 vector items or more
142
   fit into a word.  */
143
static tree
144
do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
145
               tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
146
               enum tree_code code)
147
{
148
  tree inner_type = TREE_TYPE (TREE_TYPE (a));
149
  unsigned HOST_WIDE_INT max;
150
  tree low_bits, high_bits, a_low, b_low, result_low, signs;
151
 
152
  max = GET_MODE_MASK (TYPE_MODE (inner_type));
153
  low_bits = build_replicated_const (word_type, inner_type, max >> 1);
154
  high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
155
 
156
  a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
157
  b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
158
 
159
  signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
160
  b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
161
  if (code == PLUS_EXPR)
162
    a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
163
  else
164
    {
165
      a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
166
      signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
167
    }
168
 
169
  signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
170
  result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
171
  return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
172
}
173
 
174
static tree
175
do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
176
           tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
177
           tree bitsize ATTRIBUTE_UNUSED,
178
           enum tree_code code ATTRIBUTE_UNUSED)
179
{
180
  tree inner_type = TREE_TYPE (TREE_TYPE (b));
181
  HOST_WIDE_INT max;
182
  tree low_bits, high_bits, b_low, result_low, signs;
183
 
184
  max = GET_MODE_MASK (TYPE_MODE (inner_type));
185
  low_bits = build_replicated_const (word_type, inner_type, max >> 1);
186
  high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
187
 
188
  b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
189
 
190
  b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
191
  signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
192
  signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
193
  result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
194
  return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
195
}
196
 
197
/* Expand a vector operation to scalars, by using many operations
198
   whose type is the vector type's inner type.  */
199
static tree
200
expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
201
                         tree type, tree inner_type,
202
                         tree a, tree b, enum tree_code code)
203
{
204
  VEC(constructor_elt,gc) *v;
205
  tree part_width = TYPE_SIZE (inner_type);
206
  tree index = bitsize_int (0);
207
  int nunits = TYPE_VECTOR_SUBPARTS (type);
208
  int delta = tree_low_cst (part_width, 1)
209
              / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
210
  int i;
211
 
212
  v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
213
  for (i = 0; i < nunits;
214
       i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
215
    {
216
      tree result = f (bsi, inner_type, a, b, index, part_width, code);
217
      constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
218
      ce->index = NULL_TREE;
219
      ce->value = result;
220
    }
221
 
222
  return build_constructor (type, v);
223
}
224
 
225
/* Expand a vector operation to scalars with the freedom to use
226
   a scalar integer type, or to use a different size for the items
227
   in the vector type.  */
228
static tree
229
expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
230
                        tree a, tree b,
231
                        enum tree_code code)
232
{
233
  tree result, compute_type;
234
  enum machine_mode mode;
235
  int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
236
 
237
  /* We have three strategies.  If the type is already correct, just do
238
     the operation an element at a time.  Else, if the vector is wider than
239
     one word, do it a word at a time; finally, if the vector is smaller
240
     than one word, do it as a scalar.  */
241
  if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
242
     return expand_vector_piecewise (bsi, f,
243
                                     type, TREE_TYPE (type),
244
                                     a, b, code);
245
  else if (n_words > 1)
246
    {
247
      tree word_type = build_word_mode_vector_type (n_words);
248
      result = expand_vector_piecewise (bsi, f,
249
                                        word_type, TREE_TYPE (word_type),
250
                                        a, b, code);
251
      result = gimplify_val (bsi, word_type, result);
252
    }
253
  else
254
    {
255
      /* Use a single scalar operation with a mode no wider than word_mode.  */
256
      mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
257
      compute_type = lang_hooks.types.type_for_mode (mode, 1);
258
      result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
259
    }
260
 
261
  return result;
262
}
263
 
264
/* Expand a vector operation to scalars; for integer types we can use
265
   special bit twiddling tricks to do the sums a word at a time, using
266
   function F_PARALLEL instead of F.  These tricks are done only if
267
   they can process at least four items, that is, only if the vector
268
   holds at least four items and if a word can hold four items.  */
269
static tree
270
expand_vector_addition (block_stmt_iterator *bsi,
271
                        elem_op_func f, elem_op_func f_parallel,
272
                        tree type, tree a, tree b, enum tree_code code)
273
{
274
  int parts_per_word = UNITS_PER_WORD
275
                       / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
276
 
277
  if (INTEGRAL_TYPE_P (TREE_TYPE (type))
278
      && parts_per_word >= 4
279
      && TYPE_VECTOR_SUBPARTS (type) >= 4)
280
    return expand_vector_parallel (bsi, f_parallel,
281
                                   type, a, b, code);
282
  else
283
    return expand_vector_piecewise (bsi, f,
284
                                    type, TREE_TYPE (type),
285
                                    a, b, code);
286
}
287
 
288
static tree
289
expand_vector_operation (block_stmt_iterator *bsi, tree type, tree compute_type,
290
                         tree rhs, enum tree_code code)
291
{
292
  enum machine_mode compute_mode = TYPE_MODE (compute_type);
293
 
294
  /* If the compute mode is not a vector mode (hence we are not decomposing
295
     a BLKmode vector to smaller, hardware-supported vectors), we may want
296
     to expand the operations in parallel.  */
297
  if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
298
      && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
299
    switch (code)
300
      {
301
      case PLUS_EXPR:
302
      case MINUS_EXPR:
303
        if (!TYPE_OVERFLOW_TRAPS (type))
304
          return expand_vector_addition (bsi, do_binop, do_plus_minus, type,
305
                                         TREE_OPERAND (rhs, 0),
306
                                         TREE_OPERAND (rhs, 1), code);
307
        break;
308
 
309
      case NEGATE_EXPR:
310
        if (!TYPE_OVERFLOW_TRAPS (type))
311
          return expand_vector_addition (bsi, do_unop, do_negate, type,
312
                                         TREE_OPERAND (rhs, 0),
313
                                         NULL_TREE, code);
314
        break;
315
 
316
      case BIT_AND_EXPR:
317
      case BIT_IOR_EXPR:
318
      case BIT_XOR_EXPR:
319
        return expand_vector_parallel (bsi, do_binop, type,
320
                                       TREE_OPERAND (rhs, 0),
321
                                       TREE_OPERAND (rhs, 1), code);
322
 
323
      case BIT_NOT_EXPR:
324
        return expand_vector_parallel (bsi, do_unop, type,
325
                                       TREE_OPERAND (rhs, 0),
326
                                       NULL_TREE, code);
327
 
328
      default:
329
        break;
330
      }
331
 
332
  if (TREE_CODE_CLASS (code) == tcc_unary)
333
    return expand_vector_piecewise (bsi, do_unop, type, compute_type,
334
                                    TREE_OPERAND (rhs, 0),
335
                                    NULL_TREE, code);
336
  else
337
    return expand_vector_piecewise (bsi, do_binop, type, compute_type,
338
                                    TREE_OPERAND (rhs, 0),
339
                                    TREE_OPERAND (rhs, 1), code);
340
}
341
 
342
/* Return a type for the widest vector mode whose components are of mode
343
   INNER_MODE, or NULL_TREE if none is found.  */
344
static tree
345
type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
346
{
347
  enum machine_mode best_mode = VOIDmode, mode;
348
  int best_nunits = 0;
349
 
350
  if (SCALAR_FLOAT_MODE_P (inner_mode))
351
    mode = MIN_MODE_VECTOR_FLOAT;
352
  else
353
    mode = MIN_MODE_VECTOR_INT;
354
 
355
  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
356
    if (GET_MODE_INNER (mode) == inner_mode
357
        && GET_MODE_NUNITS (mode) > best_nunits
358
        && op->handlers[mode].insn_code != CODE_FOR_nothing)
359
      best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
360
 
361
  if (best_mode == VOIDmode)
362
    return NULL_TREE;
363
  else
364
    return lang_hooks.types.type_for_mode (best_mode, 1);
365
}
366
 
367
/* Process one statement.  If we identify a vector operation, expand it.  */
368
 
369
static void
370
expand_vector_operations_1 (block_stmt_iterator *bsi)
371
{
372
  tree stmt = bsi_stmt (*bsi);
373
  tree *p_lhs, *p_rhs, lhs, rhs, type, compute_type;
374
  enum tree_code code;
375
  enum machine_mode compute_mode;
376
  optab op;
377
 
378
  switch (TREE_CODE (stmt))
379
    {
380
    case RETURN_EXPR:
381
      stmt = TREE_OPERAND (stmt, 0);
382
      if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR)
383
        return;
384
 
385
      /* FALLTHRU */
386
 
387
    case MODIFY_EXPR:
388
      p_lhs = &TREE_OPERAND (stmt, 0);
389
      p_rhs = &TREE_OPERAND (stmt, 1);
390
      lhs = *p_lhs;
391
      rhs = *p_rhs;
392
      break;
393
 
394
    default:
395
      return;
396
    }
397
 
398
  type = TREE_TYPE (rhs);
399
  if (TREE_CODE (type) != VECTOR_TYPE)
400
    return;
401
 
402
  code = TREE_CODE (rhs);
403
  if (TREE_CODE_CLASS (code) != tcc_unary
404
      && TREE_CODE_CLASS (code) != tcc_binary)
405
    return;
406
 
407
  if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR)
408
    return;
409
 
410
  gcc_assert (code != CONVERT_EXPR);
411
  op = optab_for_tree_code (code, type);
412
 
413
  /* For widening vector operations, the relevant type is of the arguments,
414
     not the widened result.  */
415
  if (code == WIDEN_SUM_EXPR)
416
    type = TREE_TYPE (TREE_OPERAND (rhs, 0));
417
 
418
  /* Optabs will try converting a negation into a subtraction, so
419
     look for it as well.  TODO: negation of floating-point vectors
420
     might be turned into an exclusive OR toggling the sign bit.  */
421
  if (op == NULL
422
      && code == NEGATE_EXPR
423
      && INTEGRAL_TYPE_P (TREE_TYPE (type)))
424
    op = optab_for_tree_code (MINUS_EXPR, type);
425
 
426
  /* For very wide vectors, try using a smaller vector mode.  */
427
  compute_type = type;
428
  if (TYPE_MODE (type) == BLKmode && op)
429
    {
430
      tree vector_compute_type
431
        = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
432
      if (vector_compute_type != NULL_TREE)
433
        compute_type = vector_compute_type;
434
    }
435
 
436
  /* If we are breaking a BLKmode vector into smaller pieces,
437
     type_for_widest_vector_mode has already looked into the optab,
438
     so skip these checks.  */
439
  if (compute_type == type)
440
    {
441
      compute_mode = TYPE_MODE (compute_type);
442
      if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
443
           || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
444
          && op != NULL
445
          && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
446
        return;
447
      else
448
        /* There is no operation in hardware, so fall back to scalars.  */
449
        compute_type = TREE_TYPE (type);
450
    }
451
 
452
  gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
453
  rhs = expand_vector_operation (bsi, type, compute_type, rhs, code);
454
  if (lang_hooks.types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
455
    *p_rhs = rhs;
456
  else
457
    *p_rhs = gimplify_build1 (bsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), rhs);
458
 
459
  mark_stmt_modified (bsi_stmt (*bsi));
460
}
461
 
462
/* Use this to lower vector operations introduced by the vectorizer,
463
   if it may need the bit-twiddling tricks implemented in this file.  */
464
 
465
static bool
466
gate_expand_vector_operations (void)
467
{
468
  return flag_tree_vectorize != 0;
469
}
470
 
471
static unsigned int
472
expand_vector_operations (void)
473
{
474
  block_stmt_iterator bsi;
475
  basic_block bb;
476
 
477
  FOR_EACH_BB (bb)
478
    {
479
      for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
480
        {
481
          expand_vector_operations_1 (&bsi);
482
          update_stmt_if_modified (bsi_stmt (bsi));
483
        }
484
    }
485
  return 0;
486
}
487
 
488
struct tree_opt_pass pass_lower_vector =
489
{
490
  "veclower",                           /* name */
491
  0,                                     /* gate */
492
  expand_vector_operations,             /* execute */
493
  NULL,                                 /* sub */
494
  NULL,                                 /* next */
495
  0,                                     /* static_pass_number */
496
  0,                                     /* tv_id */
497
  PROP_cfg,                             /* properties_required */
498
  0,                                     /* properties_provided */
499
  0,                                     /* properties_destroyed */
500
  0,                                     /* todo_flags_start */
501
  TODO_dump_func | TODO_ggc_collect
502
    | TODO_verify_stmts,                /* todo_flags_finish */
503
 
504
};
505
 
506
struct tree_opt_pass pass_lower_vector_ssa =
507
{
508
  "veclower2",                          /* name */
509
  gate_expand_vector_operations,        /* gate */
510
  expand_vector_operations,             /* execute */
511
  NULL,                                 /* sub */
512
  NULL,                                 /* next */
513
  0,                                     /* static_pass_number */
514
  0,                                     /* tv_id */
515
  PROP_cfg,                             /* properties_required */
516
  0,                                     /* properties_provided */
517
  0,                                     /* properties_destroyed */
518
  0,                                     /* todo_flags_start */
519
  TODO_dump_func | TODO_update_ssa      /* todo_flags_finish */
520
    | TODO_verify_ssa
521
    | TODO_verify_stmts | TODO_verify_flow,
522
 
523
};
524
 
525
#include "gt-tree-vect-generic.h"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.