OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [gcc/] [tree-vect-generic.c] - Blame information for rev 20

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 jlechner
/* Lower vector operations to scalar operations.
2
   Copyright (C) 2004, 2005 Free Software Foundation, Inc.
3
 
4
This file is part of GCC.
5
 
6
GCC is free software; you can redistribute it and/or modify it
7
under the terms of the GNU General Public License as published by the
8
Free Software Foundation; either version 2, or (at your option) any
9
later version.
10
 
11
GCC is distributed in the hope that it will be useful, but WITHOUT
12
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GCC; see the file COPYING.  If not, write to the Free
18
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19
02110-1301, USA.  */
20
 
21
#include "config.h"
22
#include "system.h"
23
#include "coretypes.h"
24
#include "tree.h"
25
#include "tm.h"
26
#include "rtl.h"
27
#include "expr.h"
28
#include "insn-codes.h"
29
#include "diagnostic.h"
30
#include "optabs.h"
31
#include "machmode.h"
32
#include "langhooks.h"
33
#include "tree-flow.h"
34
#include "tree-gimple.h"
35
#include "tree-iterator.h"
36
#include "tree-pass.h"
37
#include "flags.h"
38
#include "ggc.h"
39
 
40
 
41
/* Build a constant of type TYPE, made of VALUE's bits replicated
42
   every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
43
static tree
44
build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
45
{
46
  int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
47
  int n = HOST_BITS_PER_WIDE_INT / width;
48
  unsigned HOST_WIDE_INT low, high, mask;
49
  tree ret;
50
 
51
  gcc_assert (n);
52
 
53
  if (width == HOST_BITS_PER_WIDE_INT)
54
    low = value;
55
  else
56
    {
57
      mask = ((HOST_WIDE_INT)1 << width) - 1;
58
      low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
59
    }
60
 
61
  if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
62
    low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
63
  else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
64
    high = 0;
65
  else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
66
    high = low;
67
  else
68
    gcc_unreachable ();
69
 
70
  ret = build_int_cst_wide (type, low, high);
71
  return ret;
72
}
73
 
74
static GTY(()) tree vector_inner_type;
75
static GTY(()) tree vector_last_type;
76
static GTY(()) int vector_last_nunits;
77
 
78
/* Return a suitable vector types made of SUBPARTS units each of mode
79
   "word_mode" (the global variable).  */
80
static tree
81
build_word_mode_vector_type (int nunits)
82
{
83
  if (!vector_inner_type)
84
    vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
85
  else if (vector_last_nunits == nunits)
86
    {
87
      gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
88
      return vector_last_type;
89
    }
90
 
91
  /* We build a new type, but we canonicalize it nevertheless,
92
     because it still saves some memory.  */
93
  vector_last_nunits = nunits;
94
  vector_last_type = type_hash_canon (nunits,
95
                                      build_vector_type (vector_inner_type,
96
                                                         nunits));
97
  return vector_last_type;
98
}
99
 
100
typedef tree (*elem_op_func) (block_stmt_iterator *,
101
                              tree, tree, tree, tree, tree, enum tree_code);
102
 
103
static inline tree
104
tree_vec_extract (block_stmt_iterator *bsi, tree type,
105
                  tree t, tree bitsize, tree bitpos)
106
{
107
  if (bitpos)
108
    return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
109
  else
110
    return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
111
}
112
 
113
static tree
114
do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
115
         tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
116
         enum tree_code code)
117
{
118
  a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
119
  return gimplify_build1 (bsi, code, inner_type, a);
120
}
121
 
122
static tree
123
do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
124
          tree bitpos, tree bitsize, enum tree_code code)
125
{
126
  a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
127
  b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
128
  return gimplify_build2 (bsi, code, inner_type, a, b);
129
}
130
 
131
/* Expand vector addition to scalars.  This does bit twiddling
132
   in order to increase parallelism:
133
 
134
   a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
135
           (a ^ b) & 0x80808080
136
 
137
   a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
138
            (a ^ ~b) & 0x80808080
139
 
140
   -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
141
 
142
   This optimization should be done only if 4 vector items or more
143
   fit into a word.  */
144
static tree
145
do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
146
               tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
147
               enum tree_code code)
148
{
149
  tree inner_type = TREE_TYPE (TREE_TYPE (a));
150
  unsigned HOST_WIDE_INT max;
151
  tree low_bits, high_bits, a_low, b_low, result_low, signs;
152
 
153
  max = GET_MODE_MASK (TYPE_MODE (inner_type));
154
  low_bits = build_replicated_const (word_type, inner_type, max >> 1);
155
  high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
156
 
157
  a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
158
  b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
159
 
160
  signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
161
  b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
162
  if (code == PLUS_EXPR)
163
    a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
164
  else
165
    {
166
      a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
167
      signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
168
    }
169
 
170
  signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
171
  result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
172
  return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
173
}
174
 
175
static tree
176
do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
177
           tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
178
           tree bitsize ATTRIBUTE_UNUSED,
179
           enum tree_code code ATTRIBUTE_UNUSED)
180
{
181
  tree inner_type = TREE_TYPE (TREE_TYPE (b));
182
  HOST_WIDE_INT max;
183
  tree low_bits, high_bits, b_low, result_low, signs;
184
 
185
  max = GET_MODE_MASK (TYPE_MODE (inner_type));
186
  low_bits = build_replicated_const (word_type, inner_type, max >> 1);
187
  high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
188
 
189
  b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
190
 
191
  b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
192
  signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
193
  signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
194
  result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
195
  return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
196
}
197
 
198
/* Expand a vector operation to scalars, by using many operations
199
   whose type is the vector type's inner type.  */
200
static tree
201
expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
202
                         tree type, tree inner_type,
203
                         tree a, tree b, enum tree_code code)
204
{
205
  VEC(constructor_elt,gc) *v;
206
  tree part_width = TYPE_SIZE (inner_type);
207
  tree index = bitsize_int (0);
208
  int nunits = TYPE_VECTOR_SUBPARTS (type);
209
  int delta = tree_low_cst (part_width, 1)
210
              / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
211
  int i;
212
 
213
  v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
214
  for (i = 0; i < nunits;
215
       i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
216
    {
217
      tree result = f (bsi, inner_type, a, b, index, part_width, code);
218
      constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
219
      ce->index = NULL_TREE;
220
      ce->value = result;
221
    }
222
 
223
  return build_constructor (type, v);
224
}
225
 
226
/* Expand a vector operation to scalars with the freedom to use
227
   a scalar integer type, or to use a different size for the items
228
   in the vector type.  */
229
static tree
230
expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
231
                        tree a, tree b,
232
                        enum tree_code code)
233
{
234
  tree result, compute_type;
235
  enum machine_mode mode;
236
  int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
237
 
238
  /* We have three strategies.  If the type is already correct, just do
239
     the operation an element at a time.  Else, if the vector is wider than
240
     one word, do it a word at a time; finally, if the vector is smaller
241
     than one word, do it as a scalar.  */
242
  if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
243
     return expand_vector_piecewise (bsi, f,
244
                                     type, TREE_TYPE (type),
245
                                     a, b, code);
246
  else if (n_words > 1)
247
    {
248
      tree word_type = build_word_mode_vector_type (n_words);
249
      result = expand_vector_piecewise (bsi, f,
250
                                        word_type, TREE_TYPE (word_type),
251
                                        a, b, code);
252
      result = gimplify_val (bsi, word_type, result);
253
    }
254
  else
255
    {
256
      /* Use a single scalar operation with a mode no wider than word_mode.  */
257
      mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
258
      compute_type = lang_hooks.types.type_for_mode (mode, 1);
259
      result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
260
    }
261
 
262
  return result;
263
}
264
 
265
/* Expand a vector operation to scalars; for integer types we can use
266
   special bit twiddling tricks to do the sums a word at a time, using
267
   function F_PARALLEL instead of F.  These tricks are done only if
268
   they can process at least four items, that is, only if the vector
269
   holds at least four items and if a word can hold four items.  */
270
static tree
271
expand_vector_addition (block_stmt_iterator *bsi,
272
                        elem_op_func f, elem_op_func f_parallel,
273
                        tree type, tree a, tree b, enum tree_code code)
274
{
275
  int parts_per_word = UNITS_PER_WORD
276
                       / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
277
 
278
  if (INTEGRAL_TYPE_P (TREE_TYPE (type))
279
      && parts_per_word >= 4
280
      && TYPE_VECTOR_SUBPARTS (type) >= 4)
281
    return expand_vector_parallel (bsi, f_parallel,
282
                                   type, a, b, code);
283
  else
284
    return expand_vector_piecewise (bsi, f,
285
                                    type, TREE_TYPE (type),
286
                                    a, b, code);
287
}
288
 
289
static tree
290
expand_vector_operation (block_stmt_iterator *bsi, tree type, tree compute_type,
291
                         tree rhs, enum tree_code code)
292
{
293
  enum machine_mode compute_mode = TYPE_MODE (compute_type);
294
 
295
  /* If the compute mode is not a vector mode (hence we are not decomposing
296
     a BLKmode vector to smaller, hardware-supported vectors), we may want
297
     to expand the operations in parallel.  */
298
  if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
299
      && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
300
    switch (code)
301
      {
302
      case PLUS_EXPR:
303
      case MINUS_EXPR:
304
        if (!TYPE_TRAP_SIGNED (type))
305
          return expand_vector_addition (bsi, do_binop, do_plus_minus, type,
306
                                         TREE_OPERAND (rhs, 0),
307
                                         TREE_OPERAND (rhs, 1), code);
308
        break;
309
 
310
      case NEGATE_EXPR:
311
        if (!TYPE_TRAP_SIGNED (type))
312
          return expand_vector_addition (bsi, do_unop, do_negate, type,
313
                                         TREE_OPERAND (rhs, 0),
314
                                         NULL_TREE, code);
315
        break;
316
 
317
      case BIT_AND_EXPR:
318
      case BIT_IOR_EXPR:
319
      case BIT_XOR_EXPR:
320
        return expand_vector_parallel (bsi, do_binop, type,
321
                                       TREE_OPERAND (rhs, 0),
322
                                       TREE_OPERAND (rhs, 1), code);
323
 
324
      case BIT_NOT_EXPR:
325
        return expand_vector_parallel (bsi, do_unop, type,
326
                                       TREE_OPERAND (rhs, 0),
327
                                       NULL_TREE, code);
328
 
329
      default:
330
        break;
331
      }
332
 
333
  if (TREE_CODE_CLASS (code) == tcc_unary)
334
    return expand_vector_piecewise (bsi, do_unop, type, compute_type,
335
                                    TREE_OPERAND (rhs, 0),
336
                                    NULL_TREE, code);
337
  else
338
    return expand_vector_piecewise (bsi, do_binop, type, compute_type,
339
                                    TREE_OPERAND (rhs, 0),
340
                                    TREE_OPERAND (rhs, 1), code);
341
}
342
 
343
/* Return a type for the widest vector mode whose components are of mode
344
   INNER_MODE, or NULL_TREE if none is found.  */
345
static tree
346
type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
347
{
348
  enum machine_mode best_mode = VOIDmode, mode;
349
  int best_nunits = 0;
350
 
351
  if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
352
    mode = MIN_MODE_VECTOR_FLOAT;
353
  else
354
    mode = MIN_MODE_VECTOR_INT;
355
 
356
  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
357
    if (GET_MODE_INNER (mode) == inner_mode
358
        && GET_MODE_NUNITS (mode) > best_nunits
359
        && op->handlers[mode].insn_code != CODE_FOR_nothing)
360
      best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
361
 
362
  if (best_mode == VOIDmode)
363
    return NULL_TREE;
364
  else
365
    return lang_hooks.types.type_for_mode (best_mode, 1);
366
}
367
 
368
/* Process one statement.  If we identify a vector operation, expand it.  */
369
 
370
static void
371
expand_vector_operations_1 (block_stmt_iterator *bsi)
372
{
373
  tree stmt = bsi_stmt (*bsi);
374
  tree *p_lhs, *p_rhs, lhs, rhs, type, compute_type;
375
  enum tree_code code;
376
  enum machine_mode compute_mode;
377
  optab op;
378
 
379
  switch (TREE_CODE (stmt))
380
    {
381
    case RETURN_EXPR:
382
      stmt = TREE_OPERAND (stmt, 0);
383
      if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR)
384
        return;
385
 
386
      /* FALLTHRU */
387
 
388
    case MODIFY_EXPR:
389
      p_lhs = &TREE_OPERAND (stmt, 0);
390
      p_rhs = &TREE_OPERAND (stmt, 1);
391
      lhs = *p_lhs;
392
      rhs = *p_rhs;
393
      break;
394
 
395
    default:
396
      return;
397
    }
398
 
399
  type = TREE_TYPE (rhs);
400
  if (TREE_CODE (type) != VECTOR_TYPE)
401
    return;
402
 
403
  code = TREE_CODE (rhs);
404
  if (TREE_CODE_CLASS (code) != tcc_unary
405
      && TREE_CODE_CLASS (code) != tcc_binary)
406
    return;
407
 
408
  if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR)
409
    return;
410
 
411
  gcc_assert (code != CONVERT_EXPR);
412
  op = optab_for_tree_code (code, type);
413
 
414
  /* Optabs will try converting a negation into a subtraction, so
415
     look for it as well.  TODO: negation of floating-point vectors
416
     might be turned into an exclusive OR toggling the sign bit.  */
417
  if (op == NULL
418
      && code == NEGATE_EXPR
419
      && INTEGRAL_TYPE_P (TREE_TYPE (type)))
420
    op = optab_for_tree_code (MINUS_EXPR, type);
421
 
422
  /* For very wide vectors, try using a smaller vector mode.  */
423
  compute_type = type;
424
  if (TYPE_MODE (type) == BLKmode && op)
425
    {
426
      tree vector_compute_type
427
        = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
428
      if (vector_compute_type != NULL_TREE)
429
        compute_type = vector_compute_type;
430
    }
431
 
432
  /* If we are breaking a BLKmode vector into smaller pieces,
433
     type_for_widest_vector_mode has already looked into the optab,
434
     so skip these checks.  */
435
  if (compute_type == type)
436
    {
437
      compute_mode = TYPE_MODE (compute_type);
438
      if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
439
           || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
440
          && op != NULL
441
          && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
442
        return;
443
      else
444
        /* There is no operation in hardware, so fall back to scalars.  */
445
        compute_type = TREE_TYPE (type);
446
    }
447
 
448
  gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
449
  rhs = expand_vector_operation (bsi, type, compute_type, rhs, code);
450
  if (lang_hooks.types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
451
    *p_rhs = rhs;
452
  else
453
    *p_rhs = gimplify_build1 (bsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), rhs);
454
 
455
  mark_stmt_modified (bsi_stmt (*bsi));
456
}
457
 
458
/* Use this to lower vector operations introduced by the vectorizer,
459
   if it may need the bit-twiddling tricks implemented in this file.  */
460
 
461
static bool
462
gate_expand_vector_operations (void)
463
{
464
  return flag_tree_vectorize != 0;
465
}
466
 
467
static void
468
expand_vector_operations (void)
469
{
470
  block_stmt_iterator bsi;
471
  basic_block bb;
472
 
473
  FOR_EACH_BB (bb)
474
    {
475
      for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
476
        {
477
          expand_vector_operations_1 (&bsi);
478
          update_stmt_if_modified (bsi_stmt (bsi));
479
        }
480
    }
481
}
482
 
483
struct tree_opt_pass pass_lower_vector =
484
{
485
  "veclower",                           /* name */
486
  0,                                     /* gate */
487
  expand_vector_operations,             /* execute */
488
  NULL,                                 /* sub */
489
  NULL,                                 /* next */
490
  0,                                     /* static_pass_number */
491
  0,                                     /* tv_id */
492
  PROP_cfg,                             /* properties_required */
493
  0,                                     /* properties_provided */
494
  0,                                     /* properties_destroyed */
495
  0,                                     /* todo_flags_start */
496
  TODO_dump_func | TODO_ggc_collect
497
    | TODO_verify_stmts,                /* todo_flags_finish */
498
 
499
};
500
 
501
struct tree_opt_pass pass_lower_vector_ssa =
502
{
503
  "veclower2",                          /* name */
504
  gate_expand_vector_operations,        /* gate */
505
  expand_vector_operations,             /* execute */
506
  NULL,                                 /* sub */
507
  NULL,                                 /* next */
508
  0,                                     /* static_pass_number */
509
  0,                                     /* tv_id */
510
  PROP_cfg,                             /* properties_required */
511
  0,                                     /* properties_provided */
512
  0,                                     /* properties_destroyed */
513
  0,                                     /* todo_flags_start */
514
  TODO_dump_func | TODO_update_ssa      /* todo_flags_finish */
515
    | TODO_verify_ssa
516
    | TODO_verify_stmts | TODO_verify_flow,
517
 
518
};
519
 
520
#include "gt-tree-vect-generic.h"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.