OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [tree-vect-patterns.c] - Blame information for rev 731

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Analysis Utilities for Loop Vectorization.
2
   Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3
   Free Software Foundation, Inc.
4
   Contributed by Dorit Nuzman <dorit@il.ibm.com>
5
 
6
This file is part of GCC.
7
 
8
GCC is free software; you can redistribute it and/or modify it under
9
the terms of the GNU General Public License as published by the Free
10
Software Foundation; either version 3, or (at your option) any later
11
version.
12
 
13
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14
WARRANTY; without even the implied warranty of MERCHANTABILITY or
15
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16
for more details.
17
 
18
You should have received a copy of the GNU General Public License
19
along with GCC; see the file COPYING3.  If not see
20
<http://www.gnu.org/licenses/>.  */
21
 
22
#include "config.h"
23
#include "system.h"
24
#include "coretypes.h"
25
#include "tm.h"
26
#include "ggc.h"
27
#include "tree.h"
28
#include "target.h"
29
#include "basic-block.h"
30
#include "gimple-pretty-print.h"
31
#include "tree-flow.h"
32
#include "tree-dump.h"
33
#include "cfgloop.h"
34
#include "expr.h"
35
#include "optabs.h"
36
#include "params.h"
37
#include "tree-data-ref.h"
38
#include "tree-vectorizer.h"
39
#include "recog.h"
40
#include "diagnostic-core.h"
41
 
42
/* Pattern recognition functions  */
43
static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44
                                            tree *);
45
static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46
                                             tree *);
47
static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48
                                           tree *);
49
static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50
static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51
                                                 tree *);
52
static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
53
                                        tree *, tree *);
54
static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
55
                                                      tree *, tree *);
56
static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
57
                                               tree *, tree *);
58
static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
59
                                                  tree *, tree *);
60
static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
61
static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62
        vect_recog_widen_mult_pattern,
63
        vect_recog_widen_sum_pattern,
64
        vect_recog_dot_prod_pattern,
65
        vect_recog_pow_pattern,
66
        vect_recog_over_widening_pattern,
67
        vect_recog_widen_shift_pattern,
68
        vect_recog_vector_vector_shift_pattern,
69
        vect_recog_sdivmod_pow2_pattern,
70
        vect_recog_mixed_size_cond_pattern,
71
        vect_recog_bool_pattern};
72
 
73
static inline void
74
append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
75
{
76
  gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
77
                                      stmt);
78
}
79
 
80
static inline void
81
new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
82
{
83
  STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84
  append_pattern_def_seq (stmt_info, stmt);
85
}
86
 
87
/* Function widened_name_p
88
 
89
   Check whether NAME, an ssa-name used in USE_STMT,
90
   is a result of a type-promotion, such that:
91
     DEF_STMT: NAME = NOP (name0)
92
   where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
93
   If CHECK_SIGN is TRUE, check that either both types are signed or both are
94
   unsigned.  */
95
 
96
static bool
97
widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
98
                bool check_sign)
99
{
100
  tree dummy;
101
  gimple dummy_gimple;
102
  loop_vec_info loop_vinfo;
103
  stmt_vec_info stmt_vinfo;
104
  tree type = TREE_TYPE (name);
105
  tree oprnd0;
106
  enum vect_def_type dt;
107
  tree def;
108
 
109
  stmt_vinfo = vinfo_for_stmt (use_stmt);
110
  loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
111
 
112
  if (!vect_is_simple_use (name, use_stmt, loop_vinfo, NULL, def_stmt, &def,
113
                           &dt))
114
    return false;
115
 
116
  if (dt != vect_internal_def
117
      && dt != vect_external_def && dt != vect_constant_def)
118
    return false;
119
 
120
  if (! *def_stmt)
121
    return false;
122
 
123
  if (!is_gimple_assign (*def_stmt))
124
    return false;
125
 
126
  if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
127
    return false;
128
 
129
  oprnd0 = gimple_assign_rhs1 (*def_stmt);
130
 
131
  *half_type = TREE_TYPE (oprnd0);
132
  if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
133
      || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
134
      || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
135
    return false;
136
 
137
  if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
138
                           NULL, &dummy_gimple, &dummy, &dt))
139
    return false;
140
 
141
  return true;
142
}
143
 
144
/* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
145
   is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
146
 
147
static tree
148
vect_recog_temp_ssa_var (tree type, gimple stmt)
149
{
150
  tree var = create_tmp_var (type, "patt");
151
 
152
  add_referenced_var (var);
153
  var = make_ssa_name (var, stmt);
154
  return var;
155
}
156
 
157
/* Function vect_recog_dot_prod_pattern
158
 
159
   Try to find the following pattern:
160
 
161
     type x_t, y_t;
162
     TYPE1 prod;
163
     TYPE2 sum = init;
164
   loop:
165
     sum_0 = phi <init, sum_1>
166
     S1  x_t = ...
167
     S2  y_t = ...
168
     S3  x_T = (TYPE1) x_t;
169
     S4  y_T = (TYPE1) y_t;
170
     S5  prod = x_T * y_T;
171
     [S6  prod = (TYPE2) prod;  #optional]
172
     S7  sum_1 = prod + sum_0;
173
 
174
   where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
175
   same size of 'TYPE1' or bigger. This is a special case of a reduction
176
   computation.
177
 
178
   Input:
179
 
180
   * STMTS: Contains a stmt from which the pattern search begins.  In the
181
   example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
182
   will be detected.
183
 
184
   Output:
185
 
186
   * TYPE_IN: The type of the input arguments to the pattern.
187
 
188
   * TYPE_OUT: The type of the output  of this pattern.
189
 
190
   * Return value: A new stmt that will be used to replace the sequence of
191
   stmts that constitute the pattern. In this case it will be:
192
        WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
193
 
194
   Note: The dot-prod idiom is a widening reduction pattern that is
195
         vectorized without preserving all the intermediate results. It
196
         produces only N/2 (widened) results (by summing up pairs of
197
         intermediate results) rather than all N results.  Therefore, we
198
         cannot allow this pattern when we want to get all the results and in
199
         the correct order (as is the case when this computation is in an
200
         inner-loop nested in an outer-loop that us being vectorized).  */
201
 
202
static gimple
203
vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
204
                             tree *type_out)
205
{
206
  gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
207
  tree oprnd0, oprnd1;
208
  tree oprnd00, oprnd01;
209
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
210
  tree type, half_type;
211
  gimple pattern_stmt;
212
  tree prod_type;
213
  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
214
  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
215
  tree var;
216
 
217
  if (!is_gimple_assign (last_stmt))
218
    return NULL;
219
 
220
  type = gimple_expr_type (last_stmt);
221
 
222
  /* Look for the following pattern
223
          DX = (TYPE1) X;
224
          DY = (TYPE1) Y;
225
          DPROD = DX * DY;
226
          DDPROD = (TYPE2) DPROD;
227
          sum_1 = DDPROD + sum_0;
228
     In which
229
     - DX is double the size of X
230
     - DY is double the size of Y
231
     - DX, DY, DPROD all have the same type
232
     - sum is the same size of DPROD or bigger
233
     - sum has been recognized as a reduction variable.
234
 
235
     This is equivalent to:
236
       DPROD = X w* Y;          #widen mult
237
       sum_1 = DPROD w+ sum_0;  #widen summation
238
     or
239
       DPROD = X w* Y;          #widen mult
240
       sum_1 = DPROD + sum_0;   #summation
241
   */
242
 
243
  /* Starting from LAST_STMT, follow the defs of its uses in search
244
     of the above pattern.  */
245
 
246
  if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
247
    return NULL;
248
 
249
  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
250
    {
251
      /* Has been detected as widening-summation?  */
252
 
253
      stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
254
      type = gimple_expr_type (stmt);
255
      if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
256
        return NULL;
257
      oprnd0 = gimple_assign_rhs1 (stmt);
258
      oprnd1 = gimple_assign_rhs2 (stmt);
259
      half_type = TREE_TYPE (oprnd0);
260
    }
261
  else
262
    {
263
      gimple def_stmt;
264
 
265
      if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
266
        return NULL;
267
      oprnd0 = gimple_assign_rhs1 (last_stmt);
268
      oprnd1 = gimple_assign_rhs2 (last_stmt);
269
      if (!types_compatible_p (TREE_TYPE (oprnd0), type)
270
          || !types_compatible_p (TREE_TYPE (oprnd1), type))
271
        return NULL;
272
      stmt = last_stmt;
273
 
274
      if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
275
        {
276
          stmt = def_stmt;
277
          oprnd0 = gimple_assign_rhs1 (stmt);
278
        }
279
      else
280
        half_type = type;
281
    }
282
 
283
  /* So far so good.  Since last_stmt was detected as a (summation) reduction,
284
     we know that oprnd1 is the reduction variable (defined by a loop-header
285
     phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
286
     Left to check that oprnd0 is defined by a (widen_)mult_expr  */
287
  if (TREE_CODE (oprnd0) != SSA_NAME)
288
    return NULL;
289
 
290
  prod_type = half_type;
291
  stmt = SSA_NAME_DEF_STMT (oprnd0);
292
 
293
  /* It could not be the dot_prod pattern if the stmt is outside the loop.  */
294
  if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
295
    return NULL;
296
 
297
  /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
298
     inside the loop (in case we are analyzing an outer-loop).  */
299
  if (!is_gimple_assign (stmt))
300
    return NULL;
301
  stmt_vinfo = vinfo_for_stmt (stmt);
302
  gcc_assert (stmt_vinfo);
303
  if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
304
    return NULL;
305
  if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
306
    return NULL;
307
  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
308
    {
309
      /* Has been detected as a widening multiplication?  */
310
 
311
      stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
312
      if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
313
        return NULL;
314
      stmt_vinfo = vinfo_for_stmt (stmt);
315
      gcc_assert (stmt_vinfo);
316
      gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
317
      oprnd00 = gimple_assign_rhs1 (stmt);
318
      oprnd01 = gimple_assign_rhs2 (stmt);
319
    }
320
  else
321
    {
322
      tree half_type0, half_type1;
323
      gimple def_stmt;
324
      tree oprnd0, oprnd1;
325
 
326
      oprnd0 = gimple_assign_rhs1 (stmt);
327
      oprnd1 = gimple_assign_rhs2 (stmt);
328
      if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
329
          || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
330
        return NULL;
331
      if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
332
        return NULL;
333
      oprnd00 = gimple_assign_rhs1 (def_stmt);
334
      if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
335
        return NULL;
336
      oprnd01 = gimple_assign_rhs1 (def_stmt);
337
      if (!types_compatible_p (half_type0, half_type1))
338
        return NULL;
339
      if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
340
        return NULL;
341
    }
342
 
343
  half_type = TREE_TYPE (oprnd00);
344
  *type_in = half_type;
345
  *type_out = type;
346
 
347
  /* Pattern detected. Create a stmt to be used to replace the pattern: */
348
  var = vect_recog_temp_ssa_var (type, NULL);
349
  pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
350
                                                oprnd00, oprnd01, oprnd1);
351
 
352
  if (vect_print_dump_info (REPORT_DETAILS))
353
    {
354
      fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
355
      print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
356
    }
357
 
358
  /* We don't allow changing the order of the computation in the inner-loop
359
     when doing outer-loop vectorization.  */
360
  gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
361
 
362
  return pattern_stmt;
363
}
364
 
365
 
366
/* Handle widening operation by a constant.  At the moment we support MULT_EXPR
367
   and LSHIFT_EXPR.
368
 
369
   For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
370
   we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
371
 
372
   Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
373
   HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
374
   that satisfies the above restrictions,  we can perform a widening opeartion
375
   from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
376
   with a_it = (interm_type) a_t;  */
377
 
378
static bool
379
vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
380
                               tree const_oprnd, tree *oprnd,
381
                               VEC (gimple, heap) **stmts, tree type,
382
                               tree *half_type, gimple def_stmt)
383
{
384
  tree new_type, new_oprnd, tmp;
385
  gimple new_stmt;
386
  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
387
  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
388
 
389
  if (code != MULT_EXPR && code != LSHIFT_EXPR)
390
    return false;
391
 
392
  if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
393
        || (code == LSHIFT_EXPR
394
            && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
395
                != 1))
396
      && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
397
    {
398
      /* CONST_OPRND is a constant of HALF_TYPE.  */
399
      *oprnd = gimple_assign_rhs1 (def_stmt);
400
      return true;
401
    }
402
 
403
  if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
404
      || !gimple_bb (def_stmt)
405
      || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
406
      || !vinfo_for_stmt (def_stmt))
407
    return false;
408
 
409
  /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
410
     a type 2 times bigger than HALF_TYPE.  */
411
  new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
412
                                             TYPE_UNSIGNED (type));
413
  if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
414
      || (code == LSHIFT_EXPR
415
          && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
416
    return false;
417
 
418
  /* Use NEW_TYPE for widening operation.  */
419
  if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
420
    {
421
      new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
422
      /* Check if the already created pattern stmt is what we need.  */
423
      if (!is_gimple_assign (new_stmt)
424
          || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
425
          || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
426
        return false;
427
 
428
      VEC_safe_push (gimple, heap, *stmts, def_stmt);
429
      *oprnd = gimple_assign_lhs (new_stmt);
430
    }
431
  else
432
    {
433
      /* Create a_T = (NEW_TYPE) a_t;  */
434
      *oprnd = gimple_assign_rhs1 (def_stmt);
435
      tmp = create_tmp_var (new_type, NULL);
436
      add_referenced_var (tmp);
437
      new_oprnd = make_ssa_name (tmp, NULL);
438
      new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
439
                                               NULL_TREE);
440
      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
441
      VEC_safe_push (gimple, heap, *stmts, def_stmt);
442
      *oprnd = new_oprnd;
443
    }
444
 
445
  *half_type = new_type;
446
  return true;
447
}
448
 
449
 
450
/* Function vect_recog_widen_mult_pattern
451
 
452
   Try to find the following pattern:
453
 
454
     type a_t, b_t;
455
     TYPE a_T, b_T, prod_T;
456
 
457
     S1  a_t = ;
458
     S2  b_t = ;
459
     S3  a_T = (TYPE) a_t;
460
     S4  b_T = (TYPE) b_t;
461
     S5  prod_T = a_T * b_T;
462
 
463
   where type 'TYPE' is at least double the size of type 'type'.
464
 
465
   Also detect unsgigned cases:
466
 
467
     unsigned type a_t, b_t;
468
     unsigned TYPE u_prod_T;
469
     TYPE a_T, b_T, prod_T;
470
 
471
     S1  a_t = ;
472
     S2  b_t = ;
473
     S3  a_T = (TYPE) a_t;
474
     S4  b_T = (TYPE) b_t;
475
     S5  prod_T = a_T * b_T;
476
     S6  u_prod_T = (unsigned TYPE) prod_T;
477
 
478
   and multiplication by constants:
479
 
480
     type a_t;
481
     TYPE a_T, prod_T;
482
 
483
     S1  a_t = ;
484
     S3  a_T = (TYPE) a_t;
485
     S5  prod_T = a_T * CONST;
486
 
487
   A special case of multiplication by constants is when 'TYPE' is 4 times
488
   bigger than 'type', but CONST fits an intermediate type 2 times smaller
489
   than 'TYPE'.  In that case we create an additional pattern stmt for S3
490
   to create a variable of the intermediate type, and perform widen-mult
491
   on the intermediate type as well:
492
 
493
     type a_t;
494
     interm_type a_it;
495
     TYPE a_T, prod_T,  prod_T';
496
 
497
     S1  a_t = ;
498
     S3  a_T = (TYPE) a_t;
499
           '--> a_it = (interm_type) a_t;
500
     S5  prod_T = a_T * CONST;
501
           '--> prod_T' = a_it w* CONST;
502
 
503
   Input/Output:
504
 
505
   * STMTS: Contains a stmt from which the pattern search begins.  In the
506
   example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
507
   is detected.  In case of unsigned widen-mult, the original stmt (S5) is
508
   replaced with S6 in STMTS.  In case of multiplication by a constant
509
   of an intermediate type (the last case above), STMTS also contains S3
510
   (inserted before S5).
511
 
512
   Output:
513
 
514
   * TYPE_IN: The type of the input arguments to the pattern.
515
 
516
   * TYPE_OUT: The type of the output of this pattern.
517
 
518
   * Return value: A new stmt that will be used to replace the sequence of
519
   stmts that constitute the pattern.  In this case it will be:
520
        WIDEN_MULT <a_t, b_t>
521
*/
522
 
523
static gimple
524
vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
525
                               tree *type_in, tree *type_out)
526
{
527
  gimple last_stmt = VEC_pop (gimple, *stmts);
528
  gimple def_stmt0, def_stmt1;
529
  tree oprnd0, oprnd1;
530
  tree type, half_type0, half_type1;
531
  gimple pattern_stmt;
532
  tree vectype, vectype_out = NULL_TREE;
533
  tree dummy;
534
  tree var;
535
  enum tree_code dummy_code;
536
  int dummy_int;
537
  VEC (tree, heap) *dummy_vec;
538
  bool op1_ok;
539
 
540
  if (!is_gimple_assign (last_stmt))
541
    return NULL;
542
 
543
  type = gimple_expr_type (last_stmt);
544
 
545
  /* Starting from LAST_STMT, follow the defs of its uses in search
546
     of the above pattern.  */
547
 
548
  if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
549
    return NULL;
550
 
551
  oprnd0 = gimple_assign_rhs1 (last_stmt);
552
  oprnd1 = gimple_assign_rhs2 (last_stmt);
553
  if (!types_compatible_p (TREE_TYPE (oprnd0), type)
554
      || !types_compatible_p (TREE_TYPE (oprnd1), type))
555
    return NULL;
556
 
557
  /* Check argument 0.  */
558
  if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
559
    return NULL;
560
  /* Check argument 1.  */
561
  op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
562
 
563
  if (op1_ok)
564
    {
565
      oprnd0 = gimple_assign_rhs1 (def_stmt0);
566
      oprnd1 = gimple_assign_rhs1 (def_stmt1);
567
    }
568
  else
569
    {
570
      if (TREE_CODE (oprnd1) == INTEGER_CST
571
          && TREE_CODE (half_type0) == INTEGER_TYPE
572
          && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
573
                                            &oprnd0, stmts, type,
574
                                            &half_type0, def_stmt0))
575
        half_type1 = half_type0;
576
      else
577
        return NULL;
578
    }
579
 
580
  /* Handle unsigned case.  Look for
581
     S6  u_prod_T = (unsigned TYPE) prod_T;
582
     Use unsigned TYPE as the type for WIDEN_MULT_EXPR.  */
583
  if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
584
    {
585
      tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
586
      imm_use_iterator imm_iter;
587
      use_operand_p use_p;
588
      int nuses = 0;
589
      gimple use_stmt = NULL;
590
      tree use_type;
591
 
592
      if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
593
        return NULL;
594
 
595
      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
596
        {
597
          if (is_gimple_debug (USE_STMT (use_p)))
598
            continue;
599
          use_stmt = USE_STMT (use_p);
600
          nuses++;
601
        }
602
 
603
      if (nuses != 1 || !is_gimple_assign (use_stmt)
604
          || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
605
        return NULL;
606
 
607
      use_lhs = gimple_assign_lhs (use_stmt);
608
      use_type = TREE_TYPE (use_lhs);
609
      if (!INTEGRAL_TYPE_P (use_type)
610
          || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
611
          || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
612
        return NULL;
613
 
614
      type = use_type;
615
      last_stmt = use_stmt;
616
    }
617
 
618
  if (!types_compatible_p (half_type0, half_type1))
619
    return NULL;
620
 
621
  /* Pattern detected.  */
622
  if (vect_print_dump_info (REPORT_DETAILS))
623
    fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
624
 
625
  /* Check target support  */
626
  vectype = get_vectype_for_scalar_type (half_type0);
627
  vectype_out = get_vectype_for_scalar_type (type);
628
  if (!vectype
629
      || !vectype_out
630
      || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
631
                                          vectype_out, vectype,
632
                                          &dummy, &dummy, &dummy_code,
633
                                          &dummy_code, &dummy_int, &dummy_vec))
634
    return NULL;
635
 
636
  *type_in = vectype;
637
  *type_out = vectype_out;
638
 
639
  /* Pattern supported. Create a stmt to be used to replace the pattern: */
640
  var = vect_recog_temp_ssa_var (type, NULL);
641
  pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
642
                                               oprnd1);
643
 
644
  if (vect_print_dump_info (REPORT_DETAILS))
645
    print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
646
 
647
  VEC_safe_push (gimple, heap, *stmts, last_stmt);
648
  return pattern_stmt;
649
}
650
 
651
 
652
/* Function vect_recog_pow_pattern
653
 
654
   Try to find the following pattern:
655
 
656
     x = POW (y, N);
657
 
658
   with POW being one of pow, powf, powi, powif and N being
659
   either 2 or 0.5.
660
 
661
   Input:
662
 
663
   * LAST_STMT: A stmt from which the pattern search begins.
664
 
665
   Output:
666
 
667
   * TYPE_IN: The type of the input arguments to the pattern.
668
 
669
   * TYPE_OUT: The type of the output of this pattern.
670
 
671
   * Return value: A new stmt that will be used to replace the sequence of
672
   stmts that constitute the pattern. In this case it will be:
673
        x = x * x
674
   or
675
        x = sqrt (x)
676
*/
677
 
678
static gimple
679
vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
680
                        tree *type_out)
681
{
682
  gimple last_stmt = VEC_index (gimple, *stmts, 0);
683
  tree fn, base, exp = NULL;
684
  gimple stmt;
685
  tree var;
686
 
687
  if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
688
    return NULL;
689
 
690
  fn = gimple_call_fndecl (last_stmt);
691
  if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
692
   return NULL;
693
 
694
  switch (DECL_FUNCTION_CODE (fn))
695
    {
696
    case BUILT_IN_POWIF:
697
    case BUILT_IN_POWI:
698
    case BUILT_IN_POWF:
699
    case BUILT_IN_POW:
700
      base = gimple_call_arg (last_stmt, 0);
701
      exp = gimple_call_arg (last_stmt, 1);
702
      if (TREE_CODE (exp) != REAL_CST
703
          && TREE_CODE (exp) != INTEGER_CST)
704
        return NULL;
705
      break;
706
 
707
    default:
708
      return NULL;
709
    }
710
 
711
  /* We now have a pow or powi builtin function call with a constant
712
     exponent.  */
713
 
714
  *type_out = NULL_TREE;
715
 
716
  /* Catch squaring.  */
717
  if ((host_integerp (exp, 0)
718
       && tree_low_cst (exp, 0) == 2)
719
      || (TREE_CODE (exp) == REAL_CST
720
          && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
721
    {
722
      *type_in = TREE_TYPE (base);
723
 
724
      var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
725
      stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
726
      return stmt;
727
    }
728
 
729
  /* Catch square root.  */
730
  if (TREE_CODE (exp) == REAL_CST
731
      && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
732
    {
733
      tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
734
      *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
735
      if (*type_in)
736
        {
737
          gimple stmt = gimple_build_call (newfn, 1, base);
738
          if (vectorizable_function (stmt, *type_in, *type_in)
739
              != NULL_TREE)
740
            {
741
              var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
742
              gimple_call_set_lhs (stmt, var);
743
              return stmt;
744
            }
745
        }
746
    }
747
 
748
  return NULL;
749
}
750
 
751
 
752
/* Function vect_recog_widen_sum_pattern
753
 
754
   Try to find the following pattern:
755
 
756
     type x_t;
757
     TYPE x_T, sum = init;
758
   loop:
759
     sum_0 = phi <init, sum_1>
760
     S1  x_t = *p;
761
     S2  x_T = (TYPE) x_t;
762
     S3  sum_1 = x_T + sum_0;
763
 
764
   where type 'TYPE' is at least double the size of type 'type', i.e - we're
765
   summing elements of type 'type' into an accumulator of type 'TYPE'. This is
766
   a special case of a reduction computation.
767
 
768
   Input:
769
 
770
   * LAST_STMT: A stmt from which the pattern search begins. In the example,
771
   when this function is called with S3, the pattern {S2,S3} will be detected.
772
 
773
   Output:
774
 
775
   * TYPE_IN: The type of the input arguments to the pattern.
776
 
777
   * TYPE_OUT: The type of the output of this pattern.
778
 
779
   * Return value: A new stmt that will be used to replace the sequence of
780
   stmts that constitute the pattern. In this case it will be:
781
        WIDEN_SUM <x_t, sum_0>
782
 
783
   Note: The widening-sum idiom is a widening reduction pattern that is
784
         vectorized without preserving all the intermediate results. It
785
         produces only N/2 (widened) results (by summing up pairs of
786
         intermediate results) rather than all N results.  Therefore, we
787
         cannot allow this pattern when we want to get all the results and in
788
         the correct order (as is the case when this computation is in an
789
         inner-loop nested in an outer-loop that us being vectorized).  */
790
 
791
static gimple
792
vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
793
                              tree *type_out)
794
{
795
  gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
796
  tree oprnd0, oprnd1;
797
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
798
  tree type, half_type;
799
  gimple pattern_stmt;
800
  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
801
  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
802
  tree var;
803
 
804
  if (!is_gimple_assign (last_stmt))
805
    return NULL;
806
 
807
  type = gimple_expr_type (last_stmt);
808
 
809
  /* Look for the following pattern
810
          DX = (TYPE) X;
811
          sum_1 = DX + sum_0;
812
     In which DX is at least double the size of X, and sum_1 has been
813
     recognized as a reduction variable.
814
   */
815
 
816
  /* Starting from LAST_STMT, follow the defs of its uses in search
817
     of the above pattern.  */
818
 
819
  if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
820
    return NULL;
821
 
822
  if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
823
    return NULL;
824
 
825
  oprnd0 = gimple_assign_rhs1 (last_stmt);
826
  oprnd1 = gimple_assign_rhs2 (last_stmt);
827
  if (!types_compatible_p (TREE_TYPE (oprnd0), type)
828
      || !types_compatible_p (TREE_TYPE (oprnd1), type))
829
    return NULL;
830
 
831
  /* So far so good.  Since last_stmt was detected as a (summation) reduction,
832
     we know that oprnd1 is the reduction variable (defined by a loop-header
833
     phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
834
     Left to check that oprnd0 is defined by a cast from type 'type' to type
835
     'TYPE'.  */
836
 
837
  if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
838
    return NULL;
839
 
840
  oprnd0 = gimple_assign_rhs1 (stmt);
841
  *type_in = half_type;
842
  *type_out = type;
843
 
844
  /* Pattern detected. Create a stmt to be used to replace the pattern: */
845
  var = vect_recog_temp_ssa_var (type, NULL);
846
  pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
847
                                               oprnd0, oprnd1);
848
 
849
  if (vect_print_dump_info (REPORT_DETAILS))
850
    {
851
      fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
852
      print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
853
    }
854
 
855
  /* We don't allow changing the order of the computation in the inner-loop
856
     when doing outer-loop vectorization.  */
857
  gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
858
 
859
  return pattern_stmt;
860
}
861
 
862
 
863
/* Return TRUE if the operation in STMT can be performed on a smaller type.
864
 
865
   Input:
866
   STMT - a statement to check.
867
   DEF - we support operations with two operands, one of which is constant.
868
         The other operand can be defined by a demotion operation, or by a
869
         previous statement in a sequence of over-promoted operations.  In the
870
         later case DEF is used to replace that operand.  (It is defined by a
871
         pattern statement we created for the previous statement in the
872
         sequence).
873
 
874
   Input/output:
875
   NEW_TYPE - Output: a smaller type that we are trying to use.  Input: if not
876
         NULL, it's the type of DEF.
877
   STMTS - additional pattern statements.  If a pattern statement (type
878
         conversion) is created in this function, its original statement is
879
         added to STMTS.
880
 
881
   Output:
882
   OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
883
         operands to use in the new pattern statement for STMT (will be created
884
         in vect_recog_over_widening_pattern ()).
885
   NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
886
         statements for STMT: the first one is a type promotion and the second
887
         one is the operation itself.  We return the type promotion statement
888
         in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
889
         the second pattern statement.  */
890
 
891
static bool
892
vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
893
                                  tree *op0, tree *op1, gimple *new_def_stmt,
894
                                  VEC (gimple, heap) **stmts)
895
{
896
  enum tree_code code;
897
  tree const_oprnd, oprnd;
898
  tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
899
  gimple def_stmt, new_stmt;
900
  bool first = false;
901
  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
902
  struct loop *loop = LOOP_VINFO_LOOP (loop_info);
903
 
904
  *op0 = NULL_TREE;
905
  *op1 = NULL_TREE;
906
  *new_def_stmt = NULL;
907
 
908
  if (!is_gimple_assign (stmt))
909
    return false;
910
 
911
  code = gimple_assign_rhs_code (stmt);
912
  if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
913
      && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
914
    return false;
915
 
916
  oprnd = gimple_assign_rhs1 (stmt);
917
  const_oprnd = gimple_assign_rhs2 (stmt);
918
  type = gimple_expr_type (stmt);
919
 
920
  if (TREE_CODE (oprnd) != SSA_NAME
921
      || TREE_CODE (const_oprnd) != INTEGER_CST)
922
    return false;
923
 
924
  /* If we are in the middle of a sequence, we use DEF from a previous
925
     statement.  Otherwise, OPRND has to be a result of type promotion.  */
926
  if (*new_type)
927
    {
928
      half_type = *new_type;
929
      oprnd = def;
930
    }
931
  else
932
    {
933
      first = true;
934
      if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
935
          || !gimple_bb (def_stmt)
936
          || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
937
          || !vinfo_for_stmt (def_stmt))
938
        return false;
939
    }
940
 
941
  /* Can we perform the operation on a smaller type?  */
942
  switch (code)
943
    {
944
      case BIT_IOR_EXPR:
945
      case BIT_XOR_EXPR:
946
      case BIT_AND_EXPR:
947
        if (!int_fits_type_p (const_oprnd, half_type))
948
          {
949
            /* HALF_TYPE is not enough.  Try a bigger type if possible.  */
950
            if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
951
              return false;
952
 
953
            interm_type = build_nonstandard_integer_type (
954
                        TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
955
            if (!int_fits_type_p (const_oprnd, interm_type))
956
              return false;
957
          }
958
 
959
        break;
960
 
961
      case LSHIFT_EXPR:
962
        /* Try intermediate type - HALF_TYPE is not enough for sure.  */
963
        if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
964
          return false;
965
 
966
        /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
967
          (e.g., if the original value was char, the shift amount is at most 8
968
           if we want to use short).  */
969
        if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
970
          return false;
971
 
972
        interm_type = build_nonstandard_integer_type (
973
                        TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
974
 
975
        if (!vect_supportable_shift (code, interm_type))
976
          return false;
977
 
978
        break;
979
 
980
      case RSHIFT_EXPR:
981
        if (vect_supportable_shift (code, half_type))
982
          break;
983
 
984
        /* Try intermediate type - HALF_TYPE is not supported.  */
985
        if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
986
          return false;
987
 
988
        interm_type = build_nonstandard_integer_type (
989
                        TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
990
 
991
        if (!vect_supportable_shift (code, interm_type))
992
          return false;
993
 
994
        break;
995
 
996
      default:
997
        gcc_unreachable ();
998
    }
999
 
1000
  /* There are four possible cases:
1001
     1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1002
        the first statement in the sequence)
1003
        a. The original, HALF_TYPE, is not enough - we replace the promotion
1004
           from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1005
        b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1006
           promotion.
1007
     2. OPRND is defined by a pattern statement we created.
1008
        a. Its type is not sufficient for the operation, we create a new stmt:
1009
           a type conversion for OPRND from HALF_TYPE to INTERM_TYPE.  We store
1010
           this statement in NEW_DEF_STMT, and it is later put in
1011
           STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1012
        b. OPRND is good to use in the new statement.  */
1013
  if (first)
1014
    {
1015
      if (interm_type)
1016
        {
1017
          /* Replace the original type conversion HALF_TYPE->TYPE with
1018
             HALF_TYPE->INTERM_TYPE.  */
1019
          if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1020
            {
1021
              new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1022
              /* Check if the already created pattern stmt is what we need.  */
1023
              if (!is_gimple_assign (new_stmt)
1024
                  || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1025
                  || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1026
                return false;
1027
 
1028
              VEC_safe_push (gimple, heap, *stmts, def_stmt);
1029
              oprnd = gimple_assign_lhs (new_stmt);
1030
            }
1031
          else
1032
            {
1033
              /* Create NEW_OPRND = (INTERM_TYPE) OPRND.  */
1034
              oprnd = gimple_assign_rhs1 (def_stmt);
1035
              tmp = create_tmp_reg (interm_type, NULL);
1036
              add_referenced_var (tmp);
1037
              new_oprnd = make_ssa_name (tmp, NULL);
1038
              new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1039
                                                       oprnd, NULL_TREE);
1040
              STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1041
              VEC_safe_push (gimple, heap, *stmts, def_stmt);
1042
              oprnd = new_oprnd;
1043
            }
1044
        }
1045
      else
1046
        {
1047
          /* Retrieve the operand before the type promotion.  */
1048
          oprnd = gimple_assign_rhs1 (def_stmt);
1049
        }
1050
    }
1051
  else
1052
    {
1053
      if (interm_type)
1054
        {
1055
          /* Create a type conversion HALF_TYPE->INTERM_TYPE.  */
1056
          tmp = create_tmp_reg (interm_type, NULL);
1057
          add_referenced_var (tmp);
1058
          new_oprnd = make_ssa_name (tmp, NULL);
1059
          new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1060
                                                   oprnd, NULL_TREE);
1061
          oprnd = new_oprnd;
1062
          *new_def_stmt = new_stmt;
1063
        }
1064
 
1065
      /* Otherwise, OPRND is already set.  */
1066
    }
1067
 
1068
  if (interm_type)
1069
    *new_type = interm_type;
1070
  else
1071
    *new_type = half_type;
1072
 
1073
  *op0 = oprnd;
1074
  *op1 = fold_convert (*new_type, const_oprnd);
1075
 
1076
  return true;
1077
}
1078
 
1079
 
1080
/* Try to find a statement or a sequence of statements that can be performed
1081
   on a smaller type:
1082
 
1083
     type x_t;
1084
     TYPE x_T, res0_T, res1_T;
1085
   loop:
1086
     S1  x_t = *p;
1087
     S2  x_T = (TYPE) x_t;
1088
     S3  res0_T = op (x_T, C0);
1089
     S4  res1_T = op (res0_T, C1);
1090
     S5  ... = () res1_T;  - type demotion
1091
 
1092
   where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1093
   constants.
1094
   Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1095
   be 'type' or some intermediate type.  For now, we expect S5 to be a type
1096
   demotion operation.  We also check that S3 and S4 have only one use.  */
1097
 
1098
static gimple
1099
vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1100
                                  tree *type_in, tree *type_out)
1101
{
1102
  gimple stmt = VEC_pop (gimple, *stmts);
1103
  gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1104
  tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1105
  imm_use_iterator imm_iter;
1106
  use_operand_p use_p;
1107
  int nuses = 0;
1108
  tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1109
  bool first;
1110
  struct loop *loop = (gimple_bb (stmt))->loop_father;
1111
  tree type = NULL;
1112
 
1113
  first = true;
1114
  while (1)
1115
    {
1116
      if (!vinfo_for_stmt (stmt)
1117
          || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1118
        return NULL;
1119
 
1120
      new_def_stmt = NULL;
1121
      if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1122
                                             &op0, &op1, &new_def_stmt,
1123
                                             stmts))
1124
        {
1125
          if (first)
1126
            return NULL;
1127
          else
1128
            break;
1129
        }
1130
 
1131
      /* STMT can be performed on a smaller type.  Check its uses.  */
1132
      lhs = gimple_assign_lhs (stmt);
1133
      nuses = 0;
1134
      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1135
        {
1136
          if (is_gimple_debug (USE_STMT (use_p)))
1137
            continue;
1138
          use_stmt = USE_STMT (use_p);
1139
          nuses++;
1140
        }
1141
 
1142
      if (nuses != 1 || !is_gimple_assign (use_stmt)
1143
          || !gimple_bb (use_stmt)
1144
          || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1145
        return NULL;
1146
 
1147
      /* Create pattern statement for STMT.  */
1148
      vectype = get_vectype_for_scalar_type (new_type);
1149
      if (!vectype)
1150
        return NULL;
1151
 
1152
      /* We want to collect all the statements for which we create pattern
1153
         statetments, except for the case when the last statement in the
1154
         sequence doesn't have a corresponding pattern statement.  In such
1155
         case we associate the last pattern statement with the last statement
1156
         in the sequence.  Therefore, we only add the original statement to
1157
         the list if we know that it is not the last.  */
1158
      if (prev_stmt)
1159
        VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1160
 
1161
      var = vect_recog_temp_ssa_var (new_type, NULL);
1162
      pattern_stmt
1163
        = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1164
                                        op0, op1);
1165
      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1166
      new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1167
 
1168
      if (vect_print_dump_info (REPORT_DETAILS))
1169
        {
1170
          fprintf (vect_dump, "created pattern stmt: ");
1171
          print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1172
        }
1173
 
1174
      type = gimple_expr_type (stmt);
1175
      prev_stmt = stmt;
1176
      stmt = use_stmt;
1177
 
1178
      first = false;
1179
    }
1180
 
1181
  /* We got a sequence.  We expect it to end with a type demotion operation.
1182
     Otherwise, we quit (for now).  There are three possible cases: the
1183
     conversion is to NEW_TYPE (we don't do anything), the conversion is to
1184
     a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1185
     NEW_TYPE differs (we create a new conversion statement).  */
1186
  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1187
    {
1188
      use_lhs = gimple_assign_lhs (use_stmt);
1189
      use_type = TREE_TYPE (use_lhs);
1190
      /* Support only type demotion or signedess change.  */
1191
      if (!INTEGRAL_TYPE_P (use_type)
1192
          || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1193
        return NULL;
1194
 
1195
      /* Check that NEW_TYPE is not bigger than the conversion result.  */
1196
      if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1197
        return NULL;
1198
 
1199
      if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1200
          || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1201
        {
1202
          /* Create NEW_TYPE->USE_TYPE conversion.  */
1203
          tmp = create_tmp_reg (use_type, NULL);
1204
          add_referenced_var (tmp);
1205
          new_oprnd = make_ssa_name (tmp, NULL);
1206
          pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1207
                                                       var, NULL_TREE);
1208
          STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1209
 
1210
          *type_in = get_vectype_for_scalar_type (new_type);
1211
          *type_out = get_vectype_for_scalar_type (use_type);
1212
 
1213
          /* We created a pattern statement for the last statement in the
1214
             sequence, so we don't need to associate it with the pattern
1215
             statement created for PREV_STMT.  Therefore, we add PREV_STMT
1216
             to the list in order to mark it later in vect_pattern_recog_1.  */
1217
          if (prev_stmt)
1218
            VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1219
        }
1220
      else
1221
        {
1222
          if (prev_stmt)
1223
            STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1224
               = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1225
 
1226
          *type_in = vectype;
1227
          *type_out = NULL_TREE;
1228
        }
1229
 
1230
      VEC_safe_push (gimple, heap, *stmts, use_stmt);
1231
    }
1232
  else
1233
    /* TODO: support general case, create a conversion to the correct type.  */
1234
    return NULL;
1235
 
1236
  /* Pattern detected.  */
1237
  if (vect_print_dump_info (REPORT_DETAILS))
1238
    {
1239
      fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1240
      print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1241
    }
1242
 
1243
  return pattern_stmt;
1244
}
1245
 
1246
/* Detect widening shift pattern:
1247
 
1248
   type a_t;
1249
   TYPE a_T, res_T;
1250
 
1251
   S1 a_t = ;
1252
   S2 a_T = (TYPE) a_t;
1253
   S3 res_T = a_T << CONST;
1254
 
1255
  where type 'TYPE' is at least double the size of type 'type'.
1256
 
1257
  Also detect unsigned cases:
1258
 
1259
  unsigned type a_t;
1260
  unsigned TYPE u_res_T;
1261
  TYPE a_T, res_T;
1262
 
1263
  S1 a_t = ;
1264
  S2 a_T = (TYPE) a_t;
1265
  S3 res_T = a_T << CONST;
1266
  S4 u_res_T = (unsigned TYPE) res_T;
1267
 
1268
  And a case when 'TYPE' is 4 times bigger than 'type'.  In that case we
1269
  create an additional pattern stmt for S2 to create a variable of an
1270
  intermediate type, and perform widen-shift on the intermediate type:
1271
 
1272
  type a_t;
1273
  interm_type a_it;
1274
  TYPE a_T, res_T, res_T';
1275
 
1276
  S1 a_t = ;
1277
  S2 a_T = (TYPE) a_t;
1278
      '--> a_it = (interm_type) a_t;
1279
  S3 res_T = a_T << CONST;
1280
      '--> res_T' = a_it <<* CONST;
1281
 
1282
  Input/Output:
1283
 
1284
  * STMTS: Contains a stmt from which the pattern search begins.
1285
    In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1286
    in STMTS.  When an intermediate type is used and a pattern statement is
1287
    created for S2, we also put S2 here (before S3).
1288
 
1289
  Output:
1290
 
1291
  * TYPE_IN: The type of the input arguments to the pattern.
1292
 
1293
  * TYPE_OUT: The type of the output of this pattern.
1294
 
1295
  * Return value: A new stmt that will be used to replace the sequence of
1296
    stmts that constitute the pattern.  In this case it will be:
1297
    WIDEN_LSHIFT_EXPR <a_t, CONST>.  */
1298
 
1299
static gimple
1300
vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1301
                                tree *type_in, tree *type_out)
1302
{
1303
  gimple last_stmt = VEC_pop (gimple, *stmts);
1304
  gimple def_stmt0;
1305
  tree oprnd0, oprnd1;
1306
  tree type, half_type0;
1307
  gimple pattern_stmt, orig_stmt = NULL;
1308
  tree vectype, vectype_out = NULL_TREE;
1309
  tree dummy;
1310
  tree var;
1311
  enum tree_code dummy_code;
1312
  int dummy_int;
1313
  VEC (tree, heap) * dummy_vec;
1314
  gimple use_stmt = NULL;
1315
  bool over_widen = false;
1316
 
1317
  if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1318
    return NULL;
1319
 
1320
  orig_stmt = last_stmt;
1321
  if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1322
    {
1323
      /* This statement was also detected as over-widening operation (it can't
1324
         be any other pattern, because only over-widening detects shifts).
1325
         LAST_STMT is the final type demotion statement, but its related
1326
         statement is shift.  We analyze the related statement to catch cases:
1327
 
1328
         orig code:
1329
          type a_t;
1330
          itype res;
1331
          TYPE a_T, res_T;
1332
 
1333
          S1 a_T = (TYPE) a_t;
1334
          S2 res_T = a_T << CONST;
1335
          S3 res = (itype)res_T;
1336
 
1337
          (size of type * 2 <= size of itype
1338
           and size of itype * 2 <= size of TYPE)
1339
 
1340
         code after over-widening pattern detection:
1341
 
1342
          S1 a_T = (TYPE) a_t;
1343
               --> a_it = (itype) a_t;
1344
          S2 res_T = a_T << CONST;
1345
          S3 res = (itype)res_T;  <--- LAST_STMT
1346
               --> res = a_it << CONST;
1347
 
1348
         after widen_shift:
1349
 
1350
          S1 a_T = (TYPE) a_t;
1351
               --> a_it = (itype) a_t; - redundant
1352
          S2 res_T = a_T << CONST;
1353
          S3 res = (itype)res_T;
1354
               --> res = a_t w<< CONST;
1355
 
1356
      i.e., we replace the three statements with res = a_t w<< CONST.  */
1357
      last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
1358
      over_widen = true;
1359
    }
1360
 
1361
  if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1362
    return NULL;
1363
 
1364
  oprnd0 = gimple_assign_rhs1 (last_stmt);
1365
  oprnd1 = gimple_assign_rhs2 (last_stmt);
1366
  if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1367
    return NULL;
1368
 
1369
  /* Check operand 0: it has to be defined by a type promotion.  */
1370
  if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
1371
    return NULL;
1372
 
1373
  /* Check operand 1: has to be positive.  We check that it fits the type
1374
     in vect_handle_widen_op_by_const ().  */
1375
  if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1376
    return NULL;
1377
 
1378
  oprnd0 = gimple_assign_rhs1 (def_stmt0);
1379
  type = gimple_expr_type (last_stmt);
1380
 
1381
  /* Check if this a widening operation.  */
1382
  if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1383
                                      &oprnd0, stmts,
1384
                                      type, &half_type0, def_stmt0))
1385
    return NULL;
1386
 
1387
  /* Handle unsigned case.  Look for
1388
     S4  u_res_T = (unsigned TYPE) res_T;
1389
     Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR.  */
1390
  if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
1391
    {
1392
      tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
1393
      imm_use_iterator imm_iter;
1394
      use_operand_p use_p;
1395
      int nuses = 0;
1396
      tree use_type;
1397
 
1398
      if (over_widen)
1399
        {
1400
          /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1401
             We check here that TYPE is the correct type for the operation,
1402
             i.e., it's the type of the original result.  */
1403
          tree orig_type = gimple_expr_type (orig_stmt);
1404
          if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
1405
              || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
1406
            return NULL;
1407
        }
1408
      else
1409
        {
1410
          FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1411
            {
1412
              if (is_gimple_debug (USE_STMT (use_p)))
1413
                continue;
1414
              use_stmt = USE_STMT (use_p);
1415
              nuses++;
1416
            }
1417
 
1418
          if (nuses != 1 || !is_gimple_assign (use_stmt)
1419
              || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1420
            return NULL;
1421
 
1422
          use_lhs = gimple_assign_lhs (use_stmt);
1423
          use_type = TREE_TYPE (use_lhs);
1424
 
1425
          if (!INTEGRAL_TYPE_P (use_type)
1426
              || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
1427
              || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
1428
            return NULL;
1429
 
1430
          type = use_type;
1431
        }
1432
    }
1433
 
1434
  /* Pattern detected.  */
1435
  if (vect_print_dump_info (REPORT_DETAILS))
1436
    fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1437
 
1438
  /* Check target support.  */
1439
  vectype = get_vectype_for_scalar_type (half_type0);
1440
  vectype_out = get_vectype_for_scalar_type (type);
1441
 
1442
  if (!vectype
1443
      || !vectype_out
1444
      || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1445
                                          vectype_out, vectype,
1446
                                          &dummy, &dummy, &dummy_code,
1447
                                          &dummy_code, &dummy_int,
1448
                                          &dummy_vec))
1449
    return NULL;
1450
 
1451
  *type_in = vectype;
1452
  *type_out = vectype_out;
1453
 
1454
  /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1455
  var = vect_recog_temp_ssa_var (type, NULL);
1456
  pattern_stmt =
1457
    gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1458
 
1459
  if (vect_print_dump_info (REPORT_DETAILS))
1460
    print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1461
 
1462
  if (use_stmt)
1463
    last_stmt = use_stmt;
1464
  else
1465
    last_stmt = orig_stmt;
1466
 
1467
  VEC_safe_push (gimple, heap, *stmts, last_stmt);
1468
  return pattern_stmt;
1469
}
1470
 
1471
/* Detect a vector by vector shift pattern that wouldn't be otherwise
1472
   vectorized:
1473
 
1474
   type a_t;
1475
   TYPE b_T, res_T;
1476
 
1477
   S1 a_t = ;
1478
   S2 b_T = ;
1479
   S3 res_T = b_T op a_t;
1480
 
1481
  where type 'TYPE' is a type with different size than 'type',
1482
  and op is <<, >> or rotate.
1483
 
1484
  Also detect cases:
1485
 
1486
   type a_t;
1487
   TYPE b_T, c_T, res_T;
1488
 
1489
   S0 c_T = ;
1490
   S1 a_t = (type) c_T;
1491
   S2 b_T = ;
1492
   S3 res_T = b_T op a_t;
1493
 
1494
  Input/Output:
1495
 
1496
  * STMTS: Contains a stmt from which the pattern search begins,
1497
    i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
1498
    with a shift/rotate which has same type on both operands, in the
1499
    second case just b_T op c_T, in the first case with added cast
1500
    from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1501
 
1502
  Output:
1503
 
1504
  * TYPE_IN: The type of the input arguments to the pattern.
1505
 
1506
  * TYPE_OUT: The type of the output of this pattern.
1507
 
1508
  * Return value: A new stmt that will be used to replace the shift/rotate
1509
    S3 stmt.  */
1510
 
1511
static gimple
1512
vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1513
                                        tree *type_in, tree *type_out)
1514
{
1515
  gimple last_stmt = VEC_pop (gimple, *stmts);
1516
  tree oprnd0, oprnd1, lhs, var;
1517
  gimple pattern_stmt, def_stmt;
1518
  enum tree_code rhs_code;
1519
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1520
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1521
  enum vect_def_type dt;
1522
  tree def;
1523
 
1524
  if (!is_gimple_assign (last_stmt))
1525
    return NULL;
1526
 
1527
  rhs_code = gimple_assign_rhs_code (last_stmt);
1528
  switch (rhs_code)
1529
    {
1530
    case LSHIFT_EXPR:
1531
    case RSHIFT_EXPR:
1532
    case LROTATE_EXPR:
1533
    case RROTATE_EXPR:
1534
      break;
1535
    default:
1536
      return NULL;
1537
    }
1538
 
1539
  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1540
    return NULL;
1541
 
1542
  lhs = gimple_assign_lhs (last_stmt);
1543
  oprnd0 = gimple_assign_rhs1 (last_stmt);
1544
  oprnd1 = gimple_assign_rhs2 (last_stmt);
1545
  if (TREE_CODE (oprnd0) != SSA_NAME
1546
      || TREE_CODE (oprnd1) != SSA_NAME
1547
      || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1548
      || TYPE_PRECISION (TREE_TYPE (oprnd1))
1549
         != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1550
      || TYPE_PRECISION (TREE_TYPE (lhs))
1551
         != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1552
    return NULL;
1553
 
1554
  if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, NULL, &def_stmt,
1555
                           &def, &dt))
1556
    return NULL;
1557
 
1558
  if (dt != vect_internal_def)
1559
    return NULL;
1560
 
1561
  *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1562
  *type_out = *type_in;
1563
  if (*type_in == NULL_TREE)
1564
    return NULL;
1565
 
1566
  def = NULL_TREE;
1567
  if (gimple_assign_cast_p (def_stmt))
1568
    {
1569
      tree rhs1 = gimple_assign_rhs1 (def_stmt);
1570
      if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1571
          && TYPE_PRECISION (TREE_TYPE (rhs1))
1572
             == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1573
        def = rhs1;
1574
    }
1575
 
1576
  if (def == NULL_TREE)
1577
    {
1578
      def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1579
      def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1580
                                               NULL_TREE);
1581
      new_pattern_def_seq (stmt_vinfo, def_stmt);
1582
    }
1583
 
1584
  /* Pattern detected.  */
1585
  if (vect_print_dump_info (REPORT_DETAILS))
1586
    fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1587
 
1588
  /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1589
  var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1590
  pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1591
 
1592
  if (vect_print_dump_info (REPORT_DETAILS))
1593
    print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1594
 
1595
  VEC_safe_push (gimple, heap, *stmts, last_stmt);
1596
  return pattern_stmt;
1597
}
1598
 
1599
/* Detect a signed division by power of two constant that wouldn't be
1600
   otherwise vectorized:
1601
 
1602
   type a_t, b_t;
1603
 
1604
   S1 a_t = b_t / N;
1605
 
1606
  where type 'type' is a signed integral type and N is a constant positive
1607
  power of two.
1608
 
1609
  Similarly handle signed modulo by power of two constant:
1610
 
1611
   S4 a_t = b_t % N;
1612
 
1613
  Input/Output:
1614
 
1615
  * STMTS: Contains a stmt from which the pattern search begins,
1616
    i.e. the division stmt.  S1 is replaced by:
1617
  S3  y_t = b_t < 0 ? N - 1 : 0;
1618
  S2  x_t = b_t + y_t;
1619
  S1' a_t = x_t >> log2 (N);
1620
 
1621
    S4 is replaced by (where *_T temporaries have unsigned type):
1622
  S9  y_T = b_t < 0 ? -1U : 0U;
1623
  S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1624
  S7  z_t = (type) z_T;
1625
  S6  w_t = b_t + z_t;
1626
  S5  x_t = w_t & (N - 1);
1627
  S4' a_t = x_t - z_t;
1628
 
1629
  Output:
1630
 
1631
  * TYPE_IN: The type of the input arguments to the pattern.
1632
 
1633
  * TYPE_OUT: The type of the output of this pattern.
1634
 
1635
  * Return value: A new stmt that will be used to replace the division
1636
    S1 or modulo S4 stmt.  */
1637
 
1638
static gimple
1639
vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
1640
                                 tree *type_in, tree *type_out)
1641
{
1642
  gimple last_stmt = VEC_pop (gimple, *stmts);
1643
  tree oprnd0, oprnd1, vectype, itype, cond;
1644
  gimple pattern_stmt, def_stmt;
1645
  enum tree_code rhs_code;
1646
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1647
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1648
  optab optab;
1649
 
1650
  if (!is_gimple_assign (last_stmt))
1651
    return NULL;
1652
 
1653
  rhs_code = gimple_assign_rhs_code (last_stmt);
1654
  switch (rhs_code)
1655
    {
1656
    case TRUNC_DIV_EXPR:
1657
    case TRUNC_MOD_EXPR:
1658
      break;
1659
    default:
1660
      return NULL;
1661
    }
1662
 
1663
  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1664
    return NULL;
1665
 
1666
  oprnd0 = gimple_assign_rhs1 (last_stmt);
1667
  oprnd1 = gimple_assign_rhs2 (last_stmt);
1668
  itype = TREE_TYPE (oprnd0);
1669
  if (TREE_CODE (oprnd0) != SSA_NAME
1670
      || TREE_CODE (oprnd1) != INTEGER_CST
1671
      || TREE_CODE (itype) != INTEGER_TYPE
1672
      || TYPE_UNSIGNED (itype)
1673
      || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
1674
      || !integer_pow2p (oprnd1)
1675
      || tree_int_cst_sgn (oprnd1) != 1)
1676
    return NULL;
1677
 
1678
  vectype = get_vectype_for_scalar_type (itype);
1679
  if (vectype == NULL_TREE)
1680
    return NULL;
1681
 
1682
  /* If the target can handle vectorized division or modulo natively,
1683
     don't attempt to optimize this.  */
1684
  optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1685
  if (optab != NULL)
1686
    {
1687
      enum machine_mode vec_mode = TYPE_MODE (vectype);
1688
      int icode = (int) optab_handler (optab, vec_mode);
1689
      if (icode != CODE_FOR_nothing
1690
          || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
1691
        return NULL;
1692
    }
1693
 
1694
  /* Pattern detected.  */
1695
  if (vect_print_dump_info (REPORT_DETAILS))
1696
    fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
1697
 
1698
  cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
1699
  if (rhs_code == TRUNC_DIV_EXPR)
1700
    {
1701
      tree var = vect_recog_temp_ssa_var (itype, NULL);
1702
      def_stmt
1703
        = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1704
                                         fold_build2 (MINUS_EXPR, itype,
1705
                                                      oprnd1,
1706
                                                      build_int_cst (itype,
1707
                                                                     1)),
1708
                                         build_int_cst (itype, 0));
1709
      new_pattern_def_seq (stmt_vinfo, def_stmt);
1710
      var = vect_recog_temp_ssa_var (itype, NULL);
1711
      def_stmt
1712
        = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1713
                                        gimple_assign_lhs (def_stmt));
1714
      append_pattern_def_seq (stmt_vinfo, def_stmt);
1715
 
1716
      pattern_stmt
1717
        = gimple_build_assign_with_ops (RSHIFT_EXPR,
1718
                                        vect_recog_temp_ssa_var (itype, NULL),
1719
                                        var,
1720
                                        build_int_cst (itype,
1721
                                                       tree_log2 (oprnd1)));
1722
    }
1723
  else
1724
    {
1725
      tree signmask;
1726
      STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1727
      if (compare_tree_int (oprnd1, 2) == 0)
1728
        {
1729
          signmask = vect_recog_temp_ssa_var (itype, NULL);
1730
          def_stmt
1731
            = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
1732
                                             build_int_cst (itype, 1),
1733
                                             build_int_cst (itype, 0));
1734
          append_pattern_def_seq (stmt_vinfo, def_stmt);
1735
        }
1736
      else
1737
        {
1738
          tree utype
1739
            = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
1740
          tree vecutype = get_vectype_for_scalar_type (utype);
1741
          tree shift
1742
            = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1743
                                    - tree_log2 (oprnd1));
1744
          tree var = vect_recog_temp_ssa_var (utype, NULL);
1745
          stmt_vec_info def_stmt_vinfo;
1746
 
1747
          def_stmt
1748
            = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1749
                                             build_int_cst (utype, -1),
1750
                                             build_int_cst (utype, 0));
1751
          def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1752
          set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1753
          STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1754
          append_pattern_def_seq (stmt_vinfo, def_stmt);
1755
          var = vect_recog_temp_ssa_var (utype, NULL);
1756
          def_stmt
1757
            = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1758
                                            gimple_assign_lhs (def_stmt),
1759
                                            shift);
1760
          def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1761
          set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1762
          STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1763
          append_pattern_def_seq (stmt_vinfo, def_stmt);
1764
          signmask = vect_recog_temp_ssa_var (itype, NULL);
1765
          def_stmt
1766
            = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1767
                                            NULL_TREE);
1768
          append_pattern_def_seq (stmt_vinfo, def_stmt);
1769
        }
1770
      def_stmt
1771
        = gimple_build_assign_with_ops (PLUS_EXPR,
1772
                                        vect_recog_temp_ssa_var (itype, NULL),
1773
                                        oprnd0, signmask);
1774
      append_pattern_def_seq (stmt_vinfo, def_stmt);
1775
      def_stmt
1776
        = gimple_build_assign_with_ops (BIT_AND_EXPR,
1777
                                        vect_recog_temp_ssa_var (itype, NULL),
1778
                                        gimple_assign_lhs (def_stmt),
1779
                                        fold_build2 (MINUS_EXPR, itype,
1780
                                                     oprnd1,
1781
                                                     build_int_cst (itype,
1782
                                                                    1)));
1783
      append_pattern_def_seq (stmt_vinfo, def_stmt);
1784
 
1785
      pattern_stmt
1786
        = gimple_build_assign_with_ops (MINUS_EXPR,
1787
                                        vect_recog_temp_ssa_var (itype, NULL),
1788
                                        gimple_assign_lhs (def_stmt),
1789
                                        signmask);
1790
    }
1791
 
1792
  if (vect_print_dump_info (REPORT_DETAILS))
1793
    print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1794
 
1795
  VEC_safe_push (gimple, heap, *stmts, last_stmt);
1796
 
1797
  *type_in = vectype;
1798
  *type_out = vectype;
1799
  return pattern_stmt;
1800
}
1801
 
1802
/* Function vect_recog_mixed_size_cond_pattern
1803
 
1804
   Try to find the following pattern:
1805
 
1806
     type x_t, y_t;
1807
     TYPE a_T, b_T, c_T;
1808
   loop:
1809
     S1  a_T = x_t CMP y_t ? b_T : c_T;
1810
 
1811
   where type 'TYPE' is an integral type which has different size
1812
   from 'type'.  b_T and c_T are constants and if 'TYPE' is wider
1813
   than 'type', the constants need to fit into an integer type
1814
   with the same width as 'type'.
1815
 
1816
   Input:
1817
 
1818
   * LAST_STMT: A stmt from which the pattern search begins.
1819
 
1820
   Output:
1821
 
1822
   * TYPE_IN: The type of the input arguments to the pattern.
1823
 
1824
   * TYPE_OUT: The type of the output of this pattern.
1825
 
1826
   * Return value: A new stmt that will be used to replace the pattern.
1827
        Additionally a def_stmt is added.
1828
 
1829
        a_it = x_t CMP y_t ? b_it : c_it;
1830
        a_T = (TYPE) a_it;  */
1831
 
1832
static gimple
1833
vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1834
                                    tree *type_out)
1835
{
1836
  gimple last_stmt = VEC_index (gimple, *stmts, 0);
1837
  tree cond_expr, then_clause, else_clause;
1838
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1839
  tree type, vectype, comp_vectype, itype, vecitype;
1840
  enum machine_mode cmpmode;
1841
  gimple pattern_stmt, def_stmt;
1842
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1843
 
1844
  if (!is_gimple_assign (last_stmt)
1845
      || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1846
      || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1847
    return NULL;
1848
 
1849
  cond_expr = gimple_assign_rhs1 (last_stmt);
1850
  then_clause = gimple_assign_rhs2 (last_stmt);
1851
  else_clause = gimple_assign_rhs3 (last_stmt);
1852
 
1853
  if (TREE_CODE (then_clause) != INTEGER_CST
1854
      || TREE_CODE (else_clause) != INTEGER_CST)
1855
    return NULL;
1856
 
1857
  if (!COMPARISON_CLASS_P (cond_expr))
1858
    return NULL;
1859
 
1860
  comp_vectype
1861
    = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
1862
  if (comp_vectype == NULL_TREE)
1863
    return NULL;
1864
 
1865
  type = gimple_expr_type (last_stmt);
1866
  cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1867
 
1868
  if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1869
    return NULL;
1870
 
1871
  vectype = get_vectype_for_scalar_type (type);
1872
  if (vectype == NULL_TREE)
1873
    return NULL;
1874
 
1875
  if (expand_vec_cond_expr_p (vectype, comp_vectype))
1876
    return NULL;
1877
 
1878
  itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1879
                                          TYPE_UNSIGNED (type));
1880
  if (itype == NULL_TREE
1881
      || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1882
    return NULL;
1883
 
1884
  vecitype = get_vectype_for_scalar_type (itype);
1885
  if (vecitype == NULL_TREE)
1886
    return NULL;
1887
 
1888
  if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1889
    return NULL;
1890
 
1891
  if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1892
    {
1893
      if (!int_fits_type_p (then_clause, itype)
1894
          || !int_fits_type_p (else_clause, itype))
1895
        return NULL;
1896
    }
1897
 
1898
  def_stmt
1899
    = gimple_build_assign_with_ops3 (COND_EXPR,
1900
                                     vect_recog_temp_ssa_var (itype, NULL),
1901
                                     unshare_expr (cond_expr),
1902
                                     fold_convert (itype, then_clause),
1903
                                     fold_convert (itype, else_clause));
1904
  pattern_stmt
1905
    = gimple_build_assign_with_ops (NOP_EXPR,
1906
                                    vect_recog_temp_ssa_var (type, NULL),
1907
                                    gimple_assign_lhs (def_stmt), NULL_TREE);
1908
 
1909
  new_pattern_def_seq (stmt_vinfo, def_stmt);
1910
  def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1911
  set_vinfo_for_stmt (def_stmt, def_stmt_info);
1912
  STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1913
  *type_in = vecitype;
1914
  *type_out = vectype;
1915
 
1916
  return pattern_stmt;
1917
}
1918
 
1919
 
1920
/* Helper function of vect_recog_bool_pattern.  Called recursively, return
1921
   true if bool VAR can be optimized that way.  */
1922
 
1923
static bool
1924
check_bool_pattern (tree var, loop_vec_info loop_vinfo)
1925
{
1926
  gimple def_stmt;
1927
  enum vect_def_type dt;
1928
  tree def, rhs1;
1929
  enum tree_code rhs_code;
1930
 
1931
  if (!vect_is_simple_use (var, NULL, loop_vinfo, NULL, &def_stmt, &def, &dt))
1932
    return false;
1933
 
1934
  if (dt != vect_internal_def)
1935
    return false;
1936
 
1937
  if (!is_gimple_assign (def_stmt))
1938
    return false;
1939
 
1940
  if (!has_single_use (def))
1941
    return false;
1942
 
1943
  rhs1 = gimple_assign_rhs1 (def_stmt);
1944
  rhs_code = gimple_assign_rhs_code (def_stmt);
1945
  switch (rhs_code)
1946
    {
1947
    case SSA_NAME:
1948
      return check_bool_pattern (rhs1, loop_vinfo);
1949
 
1950
    CASE_CONVERT:
1951
      if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
1952
           || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1953
          && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
1954
        return false;
1955
      return check_bool_pattern (rhs1, loop_vinfo);
1956
 
1957
    case BIT_NOT_EXPR:
1958
      return check_bool_pattern (rhs1, loop_vinfo);
1959
 
1960
    case BIT_AND_EXPR:
1961
    case BIT_IOR_EXPR:
1962
    case BIT_XOR_EXPR:
1963
      if (!check_bool_pattern (rhs1, loop_vinfo))
1964
        return false;
1965
      return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo);
1966
 
1967
    default:
1968
      if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
1969
        {
1970
          tree vecitype, comp_vectype;
1971
 
1972
          /* If the comparison can throw, then is_gimple_condexpr will be
1973
             false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
1974
          if (stmt_could_throw_p (def_stmt))
1975
            return false;
1976
 
1977
          comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
1978
          if (comp_vectype == NULL_TREE)
1979
            return false;
1980
 
1981
          if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
1982
            {
1983
              enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1984
              tree itype
1985
                = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
1986
              vecitype = get_vectype_for_scalar_type (itype);
1987
              if (vecitype == NULL_TREE)
1988
                return false;
1989
            }
1990
          else
1991
            vecitype = comp_vectype;
1992
          return expand_vec_cond_expr_p (vecitype, comp_vectype);
1993
        }
1994
      return false;
1995
    }
1996
}
1997
 
1998
 
1999
/* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
2000
   stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2001
   to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT.  */
2002
 
2003
static tree
2004
adjust_bool_pattern_cast (tree type, tree var)
2005
{
2006
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2007
  gimple cast_stmt, pattern_stmt;
2008
 
2009
  gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2010
  pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2011
  new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2012
  cast_stmt
2013
    = gimple_build_assign_with_ops (NOP_EXPR,
2014
                                    vect_recog_temp_ssa_var (type, NULL),
2015
                                    gimple_assign_lhs (pattern_stmt),
2016
                                    NULL_TREE);
2017
  STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2018
  return gimple_assign_lhs (cast_stmt);
2019
}
2020
 
2021
 
2022
/* Helper function of vect_recog_bool_pattern.  Do the actual transformations,
2023
   recursively.  VAR is an SSA_NAME that should be transformed from bool
2024
   to a wider integer type, OUT_TYPE is the desired final integer type of
2025
   the whole pattern, TRUEVAL should be NULL unless optimizing
2026
   BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2027
   in the then_clause, STMTS is where statements with added pattern stmts
2028
   should be pushed to.  */
2029
 
2030
static tree
2031
adjust_bool_pattern (tree var, tree out_type, tree trueval,
2032
                     VEC (gimple, heap) **stmts)
2033
{
2034
  gimple stmt = SSA_NAME_DEF_STMT (var);
2035
  enum tree_code rhs_code, def_rhs_code;
2036
  tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2037
  location_t loc;
2038
  gimple pattern_stmt, def_stmt;
2039
 
2040
  rhs1 = gimple_assign_rhs1 (stmt);
2041
  rhs2 = gimple_assign_rhs2 (stmt);
2042
  rhs_code = gimple_assign_rhs_code (stmt);
2043
  loc = gimple_location (stmt);
2044
  switch (rhs_code)
2045
    {
2046
    case SSA_NAME:
2047
    CASE_CONVERT:
2048
      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2049
      itype = TREE_TYPE (irhs1);
2050
      pattern_stmt
2051
        = gimple_build_assign_with_ops (SSA_NAME,
2052
                                        vect_recog_temp_ssa_var (itype, NULL),
2053
                                        irhs1, NULL_TREE);
2054
      break;
2055
 
2056
    case BIT_NOT_EXPR:
2057
      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2058
      itype = TREE_TYPE (irhs1);
2059
      pattern_stmt
2060
        = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2061
                                        vect_recog_temp_ssa_var (itype, NULL),
2062
                                        irhs1, build_int_cst (itype, 1));
2063
      break;
2064
 
2065
    case BIT_AND_EXPR:
2066
      /* Try to optimize x = y & (a < b ? 1 : 0); into
2067
         x = (a < b ? y : 0);
2068
 
2069
         E.g. for:
2070
           bool a_b, b_b, c_b;
2071
           TYPE d_T;
2072
 
2073
           S1  a_b = x1 CMP1 y1;
2074
           S2  b_b = x2 CMP2 y2;
2075
           S3  c_b = a_b & b_b;
2076
           S4  d_T = (TYPE) c_b;
2077
 
2078
         we would normally emit:
2079
 
2080
           S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2081
           S2'  b_T = x2 CMP2 y2 ? 1 : 0;
2082
           S3'  c_T = a_T & b_T;
2083
           S4'  d_T = c_T;
2084
 
2085
         but we can save one stmt by using the
2086
         result of one of the COND_EXPRs in the other COND_EXPR and leave
2087
         BIT_AND_EXPR stmt out:
2088
 
2089
           S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2090
           S3'  c_T = x2 CMP2 y2 ? a_T : 0;
2091
           S4'  f_T = c_T;
2092
 
2093
         At least when VEC_COND_EXPR is implemented using masks
2094
         cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2095
         computes the comparison masks and ands it, in one case with
2096
         all ones vector, in the other case with a vector register.
2097
         Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2098
         often more expensive.  */
2099
      def_stmt = SSA_NAME_DEF_STMT (rhs2);
2100
      def_rhs_code = gimple_assign_rhs_code (def_stmt);
2101
      if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2102
        {
2103
          tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2104
          irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2105
          if (TYPE_PRECISION (TREE_TYPE (irhs1))
2106
              == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2107
            {
2108
              gimple tstmt;
2109
              stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2110
              irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2111
              tstmt = VEC_pop (gimple, *stmts);
2112
              gcc_assert (tstmt == def_stmt);
2113
              VEC_quick_push (gimple, *stmts, stmt);
2114
              STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2115
                = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2116
              gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2117
              STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2118
              return irhs2;
2119
            }
2120
          else
2121
            irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2122
          goto and_ior_xor;
2123
        }
2124
      def_stmt = SSA_NAME_DEF_STMT (rhs1);
2125
      def_rhs_code = gimple_assign_rhs_code (def_stmt);
2126
      if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2127
        {
2128
          tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2129
          irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2130
          if (TYPE_PRECISION (TREE_TYPE (irhs2))
2131
              == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2132
            {
2133
              gimple tstmt;
2134
              stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2135
              irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2136
              tstmt = VEC_pop (gimple, *stmts);
2137
              gcc_assert (tstmt == def_stmt);
2138
              VEC_quick_push (gimple, *stmts, stmt);
2139
              STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2140
                = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2141
              gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2142
              STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2143
              return irhs1;
2144
            }
2145
          else
2146
            irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2147
          goto and_ior_xor;
2148
        }
2149
      /* FALLTHRU */
2150
    case BIT_IOR_EXPR:
2151
    case BIT_XOR_EXPR:
2152
      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2153
      irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2154
    and_ior_xor:
2155
      if (TYPE_PRECISION (TREE_TYPE (irhs1))
2156
          != TYPE_PRECISION (TREE_TYPE (irhs2)))
2157
        {
2158
          int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2159
          int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2160
          int out_prec = TYPE_PRECISION (out_type);
2161
          if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2162
            irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2163
          else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2164
            irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2165
          else
2166
            {
2167
              irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2168
              irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2169
            }
2170
        }
2171
      itype = TREE_TYPE (irhs1);
2172
      pattern_stmt
2173
        = gimple_build_assign_with_ops (rhs_code,
2174
                                        vect_recog_temp_ssa_var (itype, NULL),
2175
                                        irhs1, irhs2);
2176
      break;
2177
 
2178
    default:
2179
      gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2180
      if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2181
          || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2182
        {
2183
          enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2184
          itype
2185
            = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2186
        }
2187
      else
2188
        itype = TREE_TYPE (rhs1);
2189
      cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2190
      if (trueval == NULL_TREE)
2191
        trueval = build_int_cst (itype, 1);
2192
      else
2193
        gcc_checking_assert (useless_type_conversion_p (itype,
2194
                                                        TREE_TYPE (trueval)));
2195
      pattern_stmt
2196
        = gimple_build_assign_with_ops3 (COND_EXPR,
2197
                                         vect_recog_temp_ssa_var (itype, NULL),
2198
                                         cond_expr, trueval,
2199
                                         build_int_cst (itype, 0));
2200
      break;
2201
    }
2202
 
2203
  VEC_safe_push (gimple, heap, *stmts, stmt);
2204
  gimple_set_location (pattern_stmt, loc);
2205
  STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2206
  return gimple_assign_lhs (pattern_stmt);
2207
}
2208
 
2209
 
2210
/* Function vect_recog_bool_pattern
2211
 
2212
   Try to find pattern like following:
2213
 
2214
     bool a_b, b_b, c_b, d_b, e_b;
2215
     TYPE f_T;
2216
   loop:
2217
     S1  a_b = x1 CMP1 y1;
2218
     S2  b_b = x2 CMP2 y2;
2219
     S3  c_b = a_b & b_b;
2220
     S4  d_b = x3 CMP3 y3;
2221
     S5  e_b = c_b | d_b;
2222
     S6  f_T = (TYPE) e_b;
2223
 
2224
   where type 'TYPE' is an integral type.
2225
 
2226
   Input:
2227
 
2228
   * LAST_STMT: A stmt at the end from which the pattern
2229
                search begins, i.e. cast of a bool to
2230
                an integer type.
2231
 
2232
   Output:
2233
 
2234
   * TYPE_IN: The type of the input arguments to the pattern.
2235
 
2236
   * TYPE_OUT: The type of the output of this pattern.
2237
 
2238
   * Return value: A new stmt that will be used to replace the pattern.
2239
 
2240
        Assuming size of TYPE is the same as size of all comparisons
2241
        (otherwise some casts would be added where needed), the above
2242
        sequence we create related pattern stmts:
2243
        S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2244
        S3'  c_T = x2 CMP2 y2 ? a_T : 0;
2245
        S4'  d_T = x3 CMP3 y3 ? 1 : 0;
2246
        S5'  e_T = c_T | d_T;
2247
        S6'  f_T = e_T;
2248
 
2249
        Instead of the above S3' we could emit:
2250
        S2'  b_T = x2 CMP2 y2 ? 1 : 0;
2251
        S3'  c_T = a_T | b_T;
2252
        but the above is more efficient.  */
2253
 
2254
static gimple
2255
vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2256
                         tree *type_out)
2257
{
2258
  gimple last_stmt = VEC_pop (gimple, *stmts);
2259
  enum tree_code rhs_code;
2260
  tree var, lhs, rhs, vectype;
2261
  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2262
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2263
  gimple pattern_stmt;
2264
 
2265
  if (!is_gimple_assign (last_stmt))
2266
    return NULL;
2267
 
2268
  var = gimple_assign_rhs1 (last_stmt);
2269
  lhs = gimple_assign_lhs (last_stmt);
2270
 
2271
  if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2272
       || !TYPE_UNSIGNED (TREE_TYPE (var)))
2273
      && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2274
    return NULL;
2275
 
2276
  rhs_code = gimple_assign_rhs_code (last_stmt);
2277
  if (CONVERT_EXPR_CODE_P (rhs_code))
2278
    {
2279
      if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2280
          || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2281
        return NULL;
2282
      vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2283
      if (vectype == NULL_TREE)
2284
        return NULL;
2285
 
2286
      if (!check_bool_pattern (var, loop_vinfo))
2287
        return NULL;
2288
 
2289
      rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2290
      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2291
      if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2292
        pattern_stmt
2293
          = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2294
      else
2295
        pattern_stmt
2296
          = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2297
      *type_out = vectype;
2298
      *type_in = vectype;
2299
      VEC_safe_push (gimple, heap, *stmts, last_stmt);
2300
      return pattern_stmt;
2301
    }
2302
  else if (rhs_code == SSA_NAME
2303
           && STMT_VINFO_DATA_REF (stmt_vinfo))
2304
    {
2305
      stmt_vec_info pattern_stmt_info;
2306
      vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2307
      gcc_assert (vectype != NULL_TREE);
2308
      if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2309
        return NULL;
2310
      if (!check_bool_pattern (var, loop_vinfo))
2311
        return NULL;
2312
 
2313
      rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2314
      lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2315
      if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2316
        {
2317
          tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2318
          gimple cast_stmt
2319
            = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2320
          new_pattern_def_seq (stmt_vinfo, cast_stmt);
2321
          rhs = rhs2;
2322
        }
2323
      pattern_stmt
2324
        = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2325
      pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2326
      set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2327
      STMT_VINFO_DATA_REF (pattern_stmt_info)
2328
        = STMT_VINFO_DATA_REF (stmt_vinfo);
2329
      STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2330
        = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2331
      STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2332
      STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2333
        = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2334
      STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2335
      STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2336
        = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2337
      DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2338
      *type_out = vectype;
2339
      *type_in = vectype;
2340
      VEC_safe_push (gimple, heap, *stmts, last_stmt);
2341
      return pattern_stmt;
2342
    }
2343
  else
2344
    return NULL;
2345
}
2346
 
2347
 
2348
/* Mark statements that are involved in a pattern.  */
2349
 
2350
static inline void
2351
vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2352
                         tree pattern_vectype)
2353
{
2354
  stmt_vec_info pattern_stmt_info, def_stmt_info;
2355
  stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2356
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2357
  gimple def_stmt;
2358
 
2359
  pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2360
  if (pattern_stmt_info == NULL)
2361
    {
2362
      pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2363
      set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2364
    }
2365
  gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2366
 
2367
  STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2368
  STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2369
    = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2370
  STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2371
  STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2372
  STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2373
  STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2374
    = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2375
  if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2376
    {
2377
      gimple_stmt_iterator si;
2378
      for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2379
           !gsi_end_p (si); gsi_next (&si))
2380
        {
2381
          def_stmt = gsi_stmt (si);
2382
          def_stmt_info = vinfo_for_stmt (def_stmt);
2383
          if (def_stmt_info == NULL)
2384
            {
2385
              def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
2386
              set_vinfo_for_stmt (def_stmt, def_stmt_info);
2387
            }
2388
          gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2389
          STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2390
          STMT_VINFO_DEF_TYPE (def_stmt_info)
2391
            = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2392
          if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2393
            STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2394
        }
2395
    }
2396
}
2397
 
2398
/* Function vect_pattern_recog_1
2399
 
2400
   Input:
2401
   PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2402
        computation pattern.
2403
   STMT: A stmt from which the pattern search should start.
2404
 
2405
   If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2406
   expression that computes the same functionality and can be used to
2407
   replace the sequence of stmts that are involved in the pattern.
2408
 
2409
   Output:
2410
   This function checks if the expression returned by PATTERN_RECOG_FUNC is
2411
   supported in vector form by the target.  We use 'TYPE_IN' to obtain the
2412
   relevant vector type. If 'TYPE_IN' is already a vector type, then this
2413
   indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2414
   If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2415
   to the available target pattern.
2416
 
2417
   This function also does some bookkeeping, as explained in the documentation
2418
   for vect_recog_pattern.  */
2419
 
2420
static void
2421
vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2422
                      gimple_stmt_iterator si,
2423
                      VEC (gimple, heap) **stmts_to_replace)
2424
{
2425
  gimple stmt = gsi_stmt (si), pattern_stmt;
2426
  stmt_vec_info stmt_info;
2427
  loop_vec_info loop_vinfo;
2428
  tree pattern_vectype;
2429
  tree type_in, type_out;
2430
  enum tree_code code;
2431
  int i;
2432
  gimple next;
2433
 
2434
  VEC_truncate (gimple, *stmts_to_replace, 0);
2435
  VEC_quick_push (gimple, *stmts_to_replace, stmt);
2436
  pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2437
  if (!pattern_stmt)
2438
    return;
2439
 
2440
  stmt = VEC_last (gimple, *stmts_to_replace);
2441
  stmt_info = vinfo_for_stmt (stmt);
2442
  loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2443
 
2444
  if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2445
    {
2446
      /* No need to check target support (already checked by the pattern
2447
         recognition function).  */
2448
      pattern_vectype = type_out ? type_out : type_in;
2449
    }
2450
  else
2451
    {
2452
      enum machine_mode vec_mode;
2453
      enum insn_code icode;
2454
      optab optab;
2455
 
2456
      /* Check target support  */
2457
      type_in = get_vectype_for_scalar_type (type_in);
2458
      if (!type_in)
2459
        return;
2460
      if (type_out)
2461
        type_out = get_vectype_for_scalar_type (type_out);
2462
      else
2463
        type_out = type_in;
2464
      if (!type_out)
2465
        return;
2466
      pattern_vectype = type_out;
2467
 
2468
      if (is_gimple_assign (pattern_stmt))
2469
        code = gimple_assign_rhs_code (pattern_stmt);
2470
      else
2471
        {
2472
          gcc_assert (is_gimple_call (pattern_stmt));
2473
          code = CALL_EXPR;
2474
        }
2475
 
2476
      optab = optab_for_tree_code (code, type_in, optab_default);
2477
      vec_mode = TYPE_MODE (type_in);
2478
      if (!optab
2479
          || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2480
          || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2481
        return;
2482
    }
2483
 
2484
  /* Found a vectorizable pattern.  */
2485
  if (vect_print_dump_info (REPORT_DETAILS))
2486
    {
2487
      fprintf (vect_dump, "pattern recognized: ");
2488
      print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2489
    }
2490
 
2491
  /* Mark the stmts that are involved in the pattern. */
2492
  vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2493
 
2494
  /* Patterns cannot be vectorized using SLP, because they change the order of
2495
     computation.  */
2496
  FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2497
    if (next == stmt)
2498
      VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2499
 
2500
  /* It is possible that additional pattern stmts are created and inserted in
2501
     STMTS_TO_REPLACE.  We create a stmt_info for each of them, and mark the
2502
     relevant statements.  */
2503
  for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2504
              && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2505
       i++)
2506
    {
2507
      stmt_info = vinfo_for_stmt (stmt);
2508
      pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2509
      if (vect_print_dump_info (REPORT_DETAILS))
2510
        {
2511
          fprintf (vect_dump, "additional pattern stmt: ");
2512
          print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2513
        }
2514
 
2515
      vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2516
    }
2517
}
2518
 
2519
 
2520
/* Function vect_pattern_recog
2521
 
2522
   Input:
2523
   LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2524
        computation idioms.
2525
 
2526
   Output - for each computation idiom that is detected we create a new stmt
2527
        that provides the same functionality and that can be vectorized.  We
2528
        also record some information in the struct_stmt_info of the relevant
2529
        stmts, as explained below:
2530
 
2531
   At the entry to this function we have the following stmts, with the
2532
   following initial value in the STMT_VINFO fields:
2533
 
2534
         stmt                     in_pattern_p  related_stmt    vec_stmt
2535
         S1: a_i = ....                 -       -               -
2536
         S2: a_2 = ..use(a_i)..         -       -               -
2537
         S3: a_1 = ..use(a_2)..         -       -               -
2538
         S4: a_0 = ..use(a_1)..         -       -               -
2539
         S5: ... = ..use(a_0)..         -       -               -
2540
 
2541
   Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2542
   represented by a single stmt.  We then:
2543
   - create a new stmt S6 equivalent to the pattern (the stmt is not
2544
     inserted into the code)
2545
   - fill in the STMT_VINFO fields as follows:
2546
 
2547
                                  in_pattern_p  related_stmt    vec_stmt
2548
         S1: a_i = ....                 -       -               -
2549
         S2: a_2 = ..use(a_i)..         -       -               -
2550
         S3: a_1 = ..use(a_2)..         -       -               -
2551
         S4: a_0 = ..use(a_1)..         true    S6              -
2552
          '---> S6: a_new = ....        -       S4              -
2553
         S5: ... = ..use(a_0)..         -       -               -
2554
 
2555
   (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2556
   to each other through the RELATED_STMT field).
2557
 
2558
   S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2559
   of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
2560
   remain irrelevant unless used by stmts other than S4.
2561
 
2562
   If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2563
   (because they are marked as irrelevant).  It will vectorize S6, and record
2564
   a pointer to the new vector stmt VS6 from S6 (as usual).
2565
   S4 will be skipped, and S5 will be vectorized as usual:
2566
 
2567
                                  in_pattern_p  related_stmt    vec_stmt
2568
         S1: a_i = ....                 -       -               -
2569
         S2: a_2 = ..use(a_i)..         -       -               -
2570
         S3: a_1 = ..use(a_2)..         -       -               -
2571
       > VS6: va_new = ....             -       -               -
2572
         S4: a_0 = ..use(a_1)..         true    S6              VS6
2573
          '---> S6: a_new = ....        -       S4              VS6
2574
       > VS5: ... = ..vuse(va_new)..    -       -               -
2575
         S5: ... = ..use(a_0)..         -       -               -
2576
 
2577
   DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2578
   elsewhere), and we'll end up with:
2579
 
2580
        VS6: va_new = ....
2581
        VS5: ... = ..vuse(va_new)..
2582
 
2583
   In case of more than one pattern statements, e.g., widen-mult with
2584
   intermediate type:
2585
 
2586
     S1  a_t = ;
2587
     S2  a_T = (TYPE) a_t;
2588
           '--> S3: a_it = (interm_type) a_t;
2589
     S4  prod_T = a_T * CONST;
2590
           '--> S5: prod_T' = a_it w* CONST;
2591
 
2592
   there may be other users of a_T outside the pattern.  In that case S2 will
2593
   be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2594
   and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
2595
   be recorded in S3.  */
2596
 
2597
void
2598
vect_pattern_recog (loop_vec_info loop_vinfo)
2599
{
2600
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2601
  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2602
  unsigned int nbbs = loop->num_nodes;
2603
  gimple_stmt_iterator si;
2604
  unsigned int i, j;
2605
  vect_recog_func_ptr vect_recog_func;
2606
  VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2607
 
2608
  if (vect_print_dump_info (REPORT_DETAILS))
2609
    fprintf (vect_dump, "=== vect_pattern_recog ===");
2610
 
2611
  /* Scan through the loop stmts, applying the pattern recognition
2612
     functions starting at each stmt visited:  */
2613
  for (i = 0; i < nbbs; i++)
2614
    {
2615
      basic_block bb = bbs[i];
2616
      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2617
        {
2618
          /* Scan over all generic vect_recog_xxx_pattern functions.  */
2619
          for (j = 0; j < NUM_PATTERNS; j++)
2620
            {
2621
              vect_recog_func = vect_vect_recog_func_ptrs[j];
2622
              vect_pattern_recog_1 (vect_recog_func, si,
2623
                                    &stmts_to_replace);
2624
            }
2625
        }
2626
    }
2627
 
2628
  VEC_free (gimple, heap, stmts_to_replace);
2629
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.