OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [ree.c] - Blame information for rev 801

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Redundant Extension Elimination pass for the GNU compiler.
2
   Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc.
3
   Contributed by Ilya Enkovich (ilya.enkovich@intel.com)
4
 
5
   Based on the Redundant Zero-extension elimination pass contributed by
6
   Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com).
7
 
8
This file is part of GCC.
9
 
10
GCC is free software; you can redistribute it and/or modify it under
11
the terms of the GNU General Public License as published by the Free
12
Software Foundation; either version 3, or (at your option) any later
13
version.
14
 
15
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16
WARRANTY; without even the implied warranty of MERCHANTABILITY or
17
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18
for more details.
19
 
20
You should have received a copy of the GNU General Public License
21
along with GCC; see the file COPYING3.  If not see
22
<http://www.gnu.org/licenses/>.  */
23
 
24
 
25
/* Problem Description :
26
   --------------------
27
   This pass is intended to remove redundant extension instructions.
28
   Such instructions appear for different reasons.  We expect some of
29
   them due to implicit zero-extension in 64-bit registers after writing
30
   to their lower 32-bit half (e.g. for the x86-64 architecture).
31
   Another possible reason is a type cast which follows a load (for
32
   instance a register restore) and which can be combined into a single
33
   instruction, and for which earlier local passes, e.g. the combiner,
34
   weren't able to optimize.
35
 
36
   How does this pass work  ?
37
   --------------------------
38
 
39
   This pass is run after register allocation.  Hence, all registers that
40
   this pass deals with are hard registers.  This pass first looks for an
41
   extension instruction that could possibly be redundant.  Such extension
42
   instructions show up in RTL with the pattern  :
43
   (set (reg:<SWI248> x) (any_extend:<SWI248> (reg:<SWI124> x))),
44
   where x can be any hard register.
45
   Now, this pass tries to eliminate this instruction by merging the
46
   extension with the definitions of register x.  For instance, if
47
   one of the definitions of register x was  :
48
   (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
49
   followed by extension  :
50
   (set (reg:DI x) (zero_extend:DI (reg:SI x)))
51
   then the combination converts this into :
52
   (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
53
   If all the merged definitions are recognizable assembly instructions,
54
   the extension is effectively eliminated.
55
 
56
   For example, for the x86-64 architecture, implicit zero-extensions
57
   are captured with appropriate patterns in the i386.md file.  Hence,
58
   these merged definition can be matched to a single assembly instruction.
59
   The original extension instruction is then deleted if all the
60
   definitions can be merged.
61
 
62
   However, there are cases where the definition instruction cannot be
63
   merged with an extension.  Examples are CALL instructions.  In such
64
   cases, the original extension is not redundant and this pass does
65
   not delete it.
66
 
67
   Handling conditional moves :
68
   ----------------------------
69
 
70
   Architectures like x86-64 support conditional moves whose semantics for
71
   extension differ from the other instructions.  For instance, the
72
   instruction *cmov ebx, eax*
73
   zero-extends eax onto rax only when the move from ebx to eax happens.
74
   Otherwise, eax may not be zero-extended.  Consider conditional moves as
75
   RTL instructions of the form
76
   (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
77
   This pass tries to merge an extension with a conditional move by
78
   actually merging the definitions of y and z with an extension and then
79
   converting the conditional move into :
80
   (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
81
   Since registers y and z are extended, register x will also be extended
82
   after the conditional move.  Note that this step has to be done
83
   transitively since the definition of a conditional copy can be
84
   another conditional copy.
85
 
86
   Motivating Example I :
87
   ---------------------
88
   For this program :
89
   **********************************************
90
   bad_code.c
91
 
92
   int mask[1000];
93
 
94
   int foo(unsigned x)
95
   {
96
     if (x < 10)
97
       x = x * 45;
98
     else
99
       x = x * 78;
100
     return mask[x];
101
   }
102
   **********************************************
103
 
104
   $ gcc -O2 bad_code.c
105
     ........
106
     400315:       b8 4e 00 00 00          mov    $0x4e,%eax
107
     40031a:       0f af f8                imul   %eax,%edi
108
     40031d:       89 ff                   mov    %edi,%edi - useless extension
109
     40031f:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
110
     400326:       c3                      retq
111
     ......
112
     400330:       ba 2d 00 00 00          mov    $0x2d,%edx
113
     400335:       0f af fa                imul   %edx,%edi
114
     400338:       89 ff                   mov    %edi,%edi - useless extension
115
     40033a:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
116
     400341:       c3                      retq
117
 
118
   $ gcc -O2 -free bad_code.c
119
     ......
120
     400315:       6b ff 4e                imul   $0x4e,%edi,%edi
121
     400318:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
122
     40031f:       c3                      retq
123
     400320:       6b ff 2d                imul   $0x2d,%edi,%edi
124
     400323:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
125
     40032a:       c3                      retq
126
 
127
   Motivating Example II :
128
   ---------------------
129
 
130
   Here is an example with a conditional move.
131
 
132
   For this program :
133
   **********************************************
134
 
135
   unsigned long long foo(unsigned x , unsigned y)
136
   {
137
     unsigned z;
138
     if (x > 100)
139
       z = x + y;
140
     else
141
       z = x - y;
142
     return (unsigned long long)(z);
143
   }
144
 
145
   $ gcc -O2 bad_code.c
146
     ............
147
     400360:       8d 14 3e                lea    (%rsi,%rdi,1),%edx
148
     400363:       89 f8                   mov    %edi,%eax
149
     400365:       29 f0                   sub    %esi,%eax
150
     400367:       83 ff 65                cmp    $0x65,%edi
151
     40036a:       0f 43 c2                cmovae %edx,%eax
152
     40036d:       89 c0                   mov    %eax,%eax - useless extension
153
     40036f:       c3                      retq
154
 
155
   $ gcc -O2 -free bad_code.c
156
     .............
157
     400360:       89 fa                   mov    %edi,%edx
158
     400362:       8d 04 3e                lea    (%rsi,%rdi,1),%eax
159
     400365:       29 f2                   sub    %esi,%edx
160
     400367:       83 ff 65                cmp    $0x65,%edi
161
     40036a:       89 d6                   mov    %edx,%esi
162
     40036c:       48 0f 42 c6             cmovb  %rsi,%rax
163
     400370:       c3                      retq
164
 
165
  Motivating Example III :
166
  ---------------------
167
 
168
  Here is an example with a type cast.
169
 
170
  For this program :
171
  **********************************************
172
 
173
  void test(int size, unsigned char *in, unsigned char *out)
174
  {
175
    int i;
176
    unsigned char xr, xg, xy=0;
177
 
178
    for (i = 0; i < size; i++) {
179
      xr = *in++;
180
      xg = *in++;
181
      xy = (unsigned char) ((19595*xr + 38470*xg) >> 16);
182
      *out++ = xy;
183
    }
184
  }
185
 
186
  $ gcc -O2 bad_code.c
187
    ............
188
    10:   0f b6 0e                movzbl (%rsi),%ecx
189
    13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
190
    17:   48 83 c6 02             add    $0x2,%rsi
191
    1b:   0f b6 c9                movzbl %cl,%ecx - useless extension
192
    1e:   0f b6 c0                movzbl %al,%eax - useless extension
193
    21:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
194
    27:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax
195
 
196
   $ gcc -O2 -free bad_code.c
197
     .............
198
    10:   0f b6 0e                movzbl (%rsi),%ecx
199
    13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
200
    17:   48 83 c6 02             add    $0x2,%rsi
201
    1b:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
202
    21:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax
203
 
204
   Usefulness :
205
   ----------
206
 
207
   The original redundant zero-extension elimination pass reported reduction
208
   of the dynamic instruction count of a compression benchmark by 2.8% and
209
   improvement of its run time by about 1%.
210
 
211
   The additional performance gain with the enhanced pass is mostly expected
212
   on in-order architectures where redundancy cannot be compensated by out of
213
   order execution.  Measurements showed up to 10% performance gain (reduced
214
   run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance
215
   gain 1%.  */
216
 
217
 
218
#include "config.h"
219
#include "system.h"
220
#include "coretypes.h"
221
#include "tm.h"
222
#include "rtl.h"
223
#include "tree.h"
224
#include "tm_p.h"
225
#include "flags.h"
226
#include "regs.h"
227
#include "hard-reg-set.h"
228
#include "basic-block.h"
229
#include "insn-config.h"
230
#include "function.h"
231
#include "expr.h"
232
#include "insn-attr.h"
233
#include "recog.h"
234
#include "diagnostic-core.h"
235
#include "target.h"
236
#include "timevar.h"
237
#include "optabs.h"
238
#include "insn-codes.h"
239
#include "rtlhooks-def.h"
240
/* Include output.h for dump_file.  */
241
#include "output.h"
242
#include "params.h"
243
#include "timevar.h"
244
#include "tree-pass.h"
245
#include "df.h"
246
#include "cgraph.h"
247
 
248
/* This structure represents a candidate for elimination.  */
249
 
250
typedef struct GTY(()) ext_cand
251
{
252
  /* The expression.  */
253
  const_rtx expr;
254
 
255
  /* The kind of extension.  */
256
  enum rtx_code code;
257
 
258
  /* The destination mode.  */
259
  enum machine_mode mode;
260
 
261
  /* The instruction where it lives.  */
262
  rtx insn;
263
} ext_cand;
264
 
265
DEF_VEC_O(ext_cand);
266
DEF_VEC_ALLOC_O(ext_cand, heap);
267
 
268
static int max_insn_uid;
269
 
270
/* Given a insn (CURR_INSN), an extension candidate for removal (CAND)
271
   and a pointer to the SET rtx (ORIG_SET) that needs to be modified,
272
   this code modifies the SET rtx to a new SET rtx that extends the
273
   right hand expression into a register on the left hand side.  Note
274
   that multiple assumptions are made about the nature of the set that
275
   needs to be true for this to work and is called from merge_def_and_ext.
276
 
277
   Original :
278
   (set (reg a) (expression))
279
 
280
   Transform :
281
   (set (reg a) (any_extend (expression)))
282
 
283
   Special Cases :
284
   If the expression is a constant or another extension, then directly
285
   assign it to the register.  */
286
 
287
static bool
288
combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
289
{
290
  rtx orig_src = SET_SRC (*orig_set);
291
  rtx new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
292
  rtx new_set;
293
 
294
  /* Merge constants by directly moving the constant into the register under
295
     some conditions.  Recall that RTL constants are sign-extended.  */
296
  if (GET_CODE (orig_src) == CONST_INT
297
      && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (cand->mode))
298
    {
299
      if (INTVAL (orig_src) >= 0 || cand->code == SIGN_EXTEND)
300
        new_set = gen_rtx_SET (VOIDmode, new_reg, orig_src);
301
      else
302
        {
303
          /* Zero-extend the negative constant by masking out the bits outside
304
             the source mode.  */
305
          enum machine_mode src_mode = GET_MODE (SET_DEST (*orig_set));
306
          rtx new_const_int
307
            = GEN_INT (INTVAL (orig_src) & GET_MODE_MASK (src_mode));
308
          new_set = gen_rtx_SET (VOIDmode, new_reg, new_const_int);
309
        }
310
    }
311
  else if (GET_MODE (orig_src) == VOIDmode)
312
    {
313
      /* This is mostly due to a call insn that should not be optimized.  */
314
      return false;
315
    }
316
  else if (GET_CODE (orig_src) == cand->code)
317
    {
318
      /* Here is a sequence of two extensions.  Try to merge them.  */
319
      rtx temp_extension
320
        = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
321
      rtx simplified_temp_extension = simplify_rtx (temp_extension);
322
      if (simplified_temp_extension)
323
        temp_extension = simplified_temp_extension;
324
      new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
325
    }
326
  else if (GET_CODE (orig_src) == IF_THEN_ELSE)
327
    {
328
      /* Only IF_THEN_ELSE of phi-type copies are combined.  Otherwise,
329
         in general, IF_THEN_ELSE should not be combined.  */
330
      return false;
331
    }
332
  else
333
    {
334
      /* This is the normal case.  */
335
      rtx temp_extension
336
        = gen_rtx_fmt_e (cand->code, cand->mode, orig_src);
337
      rtx simplified_temp_extension = simplify_rtx (temp_extension);
338
      if (simplified_temp_extension)
339
        temp_extension = simplified_temp_extension;
340
      new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
341
    }
342
 
343
  /* This change is a part of a group of changes.  Hence,
344
     validate_change will not try to commit the change.  */
345
  if (validate_change (curr_insn, orig_set, new_set, true))
346
    {
347
      if (dump_file)
348
        {
349
          fprintf (dump_file,
350
                   "Tentatively merged extension with definition:\n");
351
          print_rtl_single (dump_file, curr_insn);
352
        }
353
      return true;
354
    }
355
 
356
  return false;
357
}
358
 
359
/* Treat if_then_else insns, where the operands of both branches
360
   are registers, as copies.  For instance,
361
   Original :
362
   (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
363
   Transformed :
364
   (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
365
   DEF_INSN is the if_then_else insn.  */
366
 
367
static bool
368
transform_ifelse (ext_cand *cand, rtx def_insn)
369
{
370
  rtx set_insn = PATTERN (def_insn);
371
  rtx srcreg, dstreg, srcreg2;
372
  rtx map_srcreg, map_dstreg, map_srcreg2;
373
  rtx ifexpr;
374
  rtx cond;
375
  rtx new_set;
376
 
377
  gcc_assert (GET_CODE (set_insn) == SET);
378
 
379
  cond = XEXP (SET_SRC (set_insn), 0);
380
  dstreg = SET_DEST (set_insn);
381
  srcreg = XEXP (SET_SRC (set_insn), 1);
382
  srcreg2 = XEXP (SET_SRC (set_insn), 2);
383
  /* If the conditional move already has the right or wider mode,
384
     there is nothing to do.  */
385
  if (GET_MODE_SIZE (GET_MODE (dstreg)) >= GET_MODE_SIZE (cand->mode))
386
    return true;
387
 
388
  map_srcreg = gen_rtx_REG (cand->mode, REGNO (srcreg));
389
  map_srcreg2 = gen_rtx_REG (cand->mode, REGNO (srcreg2));
390
  map_dstreg = gen_rtx_REG (cand->mode, REGNO (dstreg));
391
  ifexpr = gen_rtx_IF_THEN_ELSE (cand->mode, cond, map_srcreg, map_srcreg2);
392
  new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr);
393
 
394
  if (validate_change (def_insn, &PATTERN (def_insn), new_set, true))
395
    {
396
      if (dump_file)
397
        {
398
          fprintf (dump_file,
399
                   "Mode of conditional move instruction extended:\n");
400
          print_rtl_single (dump_file, def_insn);
401
        }
402
      return true;
403
    }
404
 
405
  return false;
406
}
407
 
408
/* Get all the reaching definitions of an instruction.  The definitions are
409
   desired for REG used in INSN.  Return the definition list or NULL if a
410
   definition is missing.  If DEST is non-NULL, additionally push the INSN
411
   of the definitions onto DEST.  */
412
 
413
static struct df_link *
414
get_defs (rtx insn, rtx reg, VEC (rtx,heap) **dest)
415
{
416
  df_ref reg_info, *uses;
417
  struct df_link *ref_chain, *ref_link;
418
 
419
  reg_info = NULL;
420
 
421
  for (uses = DF_INSN_USES (insn); *uses; uses++)
422
    {
423
      reg_info = *uses;
424
      if (GET_CODE (DF_REF_REG (reg_info)) == SUBREG)
425
        return NULL;
426
      if (REGNO (DF_REF_REG (reg_info)) == REGNO (reg))
427
        break;
428
    }
429
 
430
  gcc_assert (reg_info != NULL && uses != NULL);
431
 
432
  ref_chain = DF_REF_CHAIN (reg_info);
433
 
434
  for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
435
    {
436
      /* Problem getting some definition for this instruction.  */
437
      if (ref_link->ref == NULL)
438
        return NULL;
439
      if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
440
        return NULL;
441
    }
442
 
443
  if (dest)
444
    for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
445
      VEC_safe_push (rtx, heap, *dest, DF_REF_INSN (ref_link->ref));
446
 
447
  return ref_chain;
448
}
449
 
450
/* Return true if INSN is
451
     (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
452
   and store x1 and x2 in REG_1 and REG_2.  */
453
 
454
static bool
455
is_cond_copy_insn (rtx insn, rtx *reg1, rtx *reg2)
456
{
457
  rtx expr = single_set (insn);
458
 
459
  if (expr != NULL_RTX
460
      && GET_CODE (expr) == SET
461
      && GET_CODE (SET_DEST (expr)) == REG
462
      && GET_CODE (SET_SRC (expr))  == IF_THEN_ELSE
463
      && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
464
      && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
465
    {
466
      *reg1 = XEXP (SET_SRC (expr), 1);
467
      *reg2 = XEXP (SET_SRC (expr), 2);
468
      return true;
469
    }
470
 
471
  return false;
472
}
473
 
474
enum ext_modified_kind
475
{
476
  /* The insn hasn't been modified by ree pass yet.  */
477
  EXT_MODIFIED_NONE,
478
  /* Changed into zero extension.  */
479
  EXT_MODIFIED_ZEXT,
480
  /* Changed into sign extension.  */
481
  EXT_MODIFIED_SEXT
482
};
483
 
484
struct ext_modified
485
{
486
  /* Mode from which ree has zero or sign extended the destination.  */
487
  ENUM_BITFIELD(machine_mode) mode : 8;
488
 
489
  /* Kind of modification of the insn.  */
490
  ENUM_BITFIELD(ext_modified_kind) kind : 2;
491
 
492
  /* True if the insn is scheduled to be deleted.  */
493
  unsigned int deleted : 1;
494
};
495
 
496
/* Vectors used by combine_reaching_defs and its helpers.  */
497
typedef struct ext_state
498
{
499
  /* In order to avoid constant VEC_alloc/VEC_free, we keep these
500
     4 vectors live through the entire find_and_remove_re and just
501
     VEC_truncate them each time.  */
502
  VEC (rtx, heap) *defs_list;
503
  VEC (rtx, heap) *copies_list;
504
  VEC (rtx, heap) *modified_list;
505
  VEC (rtx, heap) *work_list;
506
 
507
  /* For instructions that have been successfully modified, this is
508
     the original mode from which the insn is extending and
509
     kind of extension.  */
510
  struct ext_modified *modified;
511
} ext_state;
512
 
513
/* Reaching Definitions of the extended register could be conditional copies
514
   or regular definitions.  This function separates the two types into two
515
   lists, STATE->DEFS_LIST and STATE->COPIES_LIST.  This is necessary because,
516
   if a reaching definition is a conditional copy, merging the extension with
517
   this definition is wrong.  Conditional copies are merged by transitively
518
   merging their definitions.  The defs_list is populated with all the reaching
519
   definitions of the extension instruction (EXTEND_INSN) which must be merged
520
   with an extension.  The copies_list contains all the conditional moves that
521
   will later be extended into a wider mode conditional move if all the merges
522
   are successful.  The function returns false upon failure, true upon
523
   success.  */
524
 
525
static bool
526
make_defs_and_copies_lists (rtx extend_insn, const_rtx set_pat,
527
                            ext_state *state)
528
{
529
  rtx src_reg = XEXP (SET_SRC (set_pat), 0);
530
  bool *is_insn_visited;
531
  bool ret = true;
532
 
533
  VEC_truncate (rtx, state->work_list, 0);
534
 
535
  /* Initialize the work list.  */
536
  if (!get_defs (extend_insn, src_reg, &state->work_list))
537
    gcc_unreachable ();
538
 
539
  is_insn_visited = XCNEWVEC (bool, max_insn_uid);
540
 
541
  /* Perform transitive closure for conditional copies.  */
542
  while (!VEC_empty (rtx, state->work_list))
543
    {
544
      rtx def_insn = VEC_pop (rtx, state->work_list);
545
      rtx reg1, reg2;
546
 
547
      gcc_assert (INSN_UID (def_insn) < max_insn_uid);
548
 
549
      if (is_insn_visited[INSN_UID (def_insn)])
550
        continue;
551
      is_insn_visited[INSN_UID (def_insn)] = true;
552
 
553
      if (is_cond_copy_insn (def_insn, &reg1, &reg2))
554
        {
555
          /* Push it onto the copy list first.  */
556
          VEC_safe_push (rtx, heap, state->copies_list, def_insn);
557
 
558
          /* Now perform the transitive closure.  */
559
          if (!get_defs (def_insn, reg1, &state->work_list)
560
              || !get_defs (def_insn, reg2, &state->work_list))
561
            {
562
              ret = false;
563
              break;
564
            }
565
        }
566
      else
567
        VEC_safe_push (rtx, heap, state->defs_list, def_insn);
568
    }
569
 
570
  XDELETEVEC (is_insn_visited);
571
 
572
  return ret;
573
}
574
 
575
/* Merge the DEF_INSN with an extension.  Calls combine_set_extension
576
   on the SET pattern.  */
577
 
578
static bool
579
merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
580
{
581
  enum machine_mode ext_src_mode;
582
  enum rtx_code code;
583
  rtx *sub_rtx;
584
  rtx s_expr;
585
  int i;
586
 
587
  ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
588
  code = GET_CODE (PATTERN (def_insn));
589
  sub_rtx = NULL;
590
 
591
  if (code == PARALLEL)
592
    {
593
      for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
594
        {
595
          s_expr = XVECEXP (PATTERN (def_insn), 0, i);
596
          if (GET_CODE (s_expr) != SET)
597
            continue;
598
 
599
          if (sub_rtx == NULL)
600
            sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
601
          else
602
            {
603
              /* PARALLEL with multiple SETs.  */
604
              return false;
605
            }
606
        }
607
    }
608
  else if (code == SET)
609
    sub_rtx = &PATTERN (def_insn);
610
  else
611
    {
612
      /* It is not a PARALLEL or a SET, what could it be ? */
613
      return false;
614
    }
615
 
616
  gcc_assert (sub_rtx != NULL);
617
 
618
  if (REG_P (SET_DEST (*sub_rtx))
619
      && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
620
          || ((state->modified[INSN_UID (def_insn)].kind
621
               == (cand->code == ZERO_EXTEND
622
                   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT))
623
              && state->modified[INSN_UID (def_insn)].mode
624
                 == ext_src_mode)))
625
    {
626
      if (GET_MODE_SIZE (GET_MODE (SET_DEST (*sub_rtx)))
627
          >= GET_MODE_SIZE (cand->mode))
628
        return true;
629
      /* If def_insn is already scheduled to be deleted, don't attempt
630
         to modify it.  */
631
      if (state->modified[INSN_UID (def_insn)].deleted)
632
        return false;
633
      if (combine_set_extension (cand, def_insn, sub_rtx))
634
        {
635
          if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
636
            state->modified[INSN_UID (def_insn)].mode = ext_src_mode;
637
          return true;
638
        }
639
    }
640
 
641
  return false;
642
}
643
 
644
/* This function goes through all reaching defs of the source
645
   of the candidate for elimination (CAND) and tries to combine
646
   the extension with the definition instruction.  The changes
647
   are made as a group so that even if one definition cannot be
648
   merged, all reaching definitions end up not being merged.
649
   When a conditional copy is encountered, merging is attempted
650
   transitively on its definitions.  It returns true upon success
651
   and false upon failure.  */
652
 
653
static bool
654
combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
655
{
656
  rtx def_insn;
657
  bool merge_successful = true;
658
  int i;
659
  int defs_ix;
660
  bool outcome;
661
 
662
  VEC_truncate (rtx, state->defs_list, 0);
663
  VEC_truncate (rtx, state->copies_list, 0);
664
 
665
  outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
666
 
667
  if (!outcome)
668
    return false;
669
 
670
  merge_successful = true;
671
 
672
  /* Go through the defs vector and try to merge all the definitions
673
     in this vector.  */
674
  VEC_truncate (rtx, state->modified_list, 0);
675
  FOR_EACH_VEC_ELT (rtx, state->defs_list, defs_ix, def_insn)
676
    {
677
      if (merge_def_and_ext (cand, def_insn, state))
678
        VEC_safe_push (rtx, heap, state->modified_list, def_insn);
679
      else
680
        {
681
          merge_successful = false;
682
          break;
683
        }
684
    }
685
 
686
  /* Now go through the conditional copies vector and try to merge all
687
     the copies in this vector.  */
688
  if (merge_successful)
689
    {
690
      FOR_EACH_VEC_ELT (rtx, state->copies_list, i, def_insn)
691
        {
692
          if (transform_ifelse (cand, def_insn))
693
            VEC_safe_push (rtx, heap, state->modified_list, def_insn);
694
          else
695
            {
696
              merge_successful = false;
697
              break;
698
            }
699
        }
700
    }
701
 
702
  if (merge_successful)
703
    {
704
      /* Commit the changes here if possible
705
         FIXME: It's an all-or-nothing scenario.  Even if only one definition
706
         cannot be merged, we entirely give up.  In the future, we should allow
707
         extensions to be partially eliminated along those paths where the
708
         definitions could be merged.  */
709
      if (apply_change_group ())
710
        {
711
          if (dump_file)
712
            fprintf (dump_file, "All merges were successful.\n");
713
 
714
          FOR_EACH_VEC_ELT (rtx, state->modified_list, i, def_insn)
715
            if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
716
              state->modified[INSN_UID (def_insn)].kind
717
                = (cand->code == ZERO_EXTEND
718
                   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT);
719
 
720
          return true;
721
        }
722
      else
723
        {
724
          /* Changes need not be cancelled explicitly as apply_change_group
725
             does it.  Print list of definitions in the dump_file for debug
726
             purposes.  This extension cannot be deleted.  */
727
          if (dump_file)
728
            {
729
              fprintf (dump_file,
730
                       "Merge cancelled, non-mergeable definitions:\n");
731
              FOR_EACH_VEC_ELT (rtx, state->modified_list, i, def_insn)
732
                print_rtl_single (dump_file, def_insn);
733
            }
734
        }
735
    }
736
  else
737
    {
738
      /* Cancel any changes that have been made so far.  */
739
      cancel_changes (0);
740
    }
741
 
742
  return false;
743
}
744
 
745
/* Add an extension pattern that could be eliminated.  */
746
 
747
static void
748
add_removable_extension (const_rtx expr, rtx insn,
749
                         VEC (ext_cand, heap) **insn_list,
750
                         unsigned *def_map)
751
{
752
  enum rtx_code code;
753
  enum machine_mode mode;
754
  unsigned int idx;
755
  rtx src, dest;
756
 
757
  /* We are looking for SET (REG N) (ANY_EXTEND (REG N)).  */
758
  if (GET_CODE (expr) != SET)
759
    return;
760
 
761
  src = SET_SRC (expr);
762
  code = GET_CODE (src);
763
  dest = SET_DEST (expr);
764
  mode = GET_MODE (dest);
765
 
766
  if (REG_P (dest)
767
      && (code == SIGN_EXTEND || code == ZERO_EXTEND)
768
      && REG_P (XEXP (src, 0))
769
      && REGNO (dest) == REGNO (XEXP (src, 0)))
770
    {
771
      struct df_link *defs, *def;
772
      ext_cand *cand;
773
 
774
      /* First, make sure we can get all the reaching definitions.  */
775
      defs = get_defs (insn, XEXP (src, 0), NULL);
776
      if (!defs)
777
        {
778
          if (dump_file)
779
            {
780
              fprintf (dump_file, "Cannot eliminate extension:\n");
781
              print_rtl_single (dump_file, insn);
782
              fprintf (dump_file, " because of missing definition(s)\n");
783
            }
784
          return;
785
        }
786
 
787
      /* Second, make sure the reaching definitions don't feed another and
788
         different extension.  FIXME: this obviously can be improved.  */
789
      for (def = defs; def; def = def->next)
790
        if ((idx = def_map[INSN_UID(DF_REF_INSN (def->ref))])
791
            && (cand = VEC_index (ext_cand, *insn_list, idx - 1))
792
            && (cand->code != code || cand->mode != mode))
793
          {
794
            if (dump_file)
795
              {
796
                fprintf (dump_file, "Cannot eliminate extension:\n");
797
                print_rtl_single (dump_file, insn);
798
                fprintf (dump_file, " because of other extension\n");
799
              }
800
            return;
801
          }
802
 
803
      /* Then add the candidate to the list and insert the reaching definitions
804
         into the definition map.  */
805
      cand = VEC_safe_push (ext_cand, heap, *insn_list, NULL);
806
      cand->expr = expr;
807
      cand->code = code;
808
      cand->mode = mode;
809
      cand->insn = insn;
810
      idx = VEC_length (ext_cand, *insn_list);
811
 
812
      for (def = defs; def; def = def->next)
813
        def_map[INSN_UID(DF_REF_INSN (def->ref))] = idx;
814
    }
815
}
816
 
817
/* Traverse the instruction stream looking for extensions and return the
818
   list of candidates.  */
819
 
820
static VEC (ext_cand, heap)*
821
find_removable_extensions (void)
822
{
823
  VEC (ext_cand, heap) *insn_list = NULL;
824
  basic_block bb;
825
  rtx insn, set;
826
  unsigned *def_map = XCNEWVEC (unsigned, max_insn_uid);
827
 
828
  FOR_EACH_BB (bb)
829
    FOR_BB_INSNS (bb, insn)
830
      {
831
        if (!NONDEBUG_INSN_P (insn))
832
          continue;
833
 
834
        set = single_set (insn);
835
        if (set == NULL_RTX)
836
          continue;
837
        add_removable_extension (set, insn, &insn_list, def_map);
838
      }
839
 
840
  XDELETEVEC (def_map);
841
 
842
  return insn_list;
843
}
844
 
845
/* This is the main function that checks the insn stream for redundant
846
   extensions and tries to remove them if possible.  */
847
 
848
static void
849
find_and_remove_re (void)
850
{
851
  ext_cand *curr_cand;
852
  rtx curr_insn = NULL_RTX;
853
  int num_re_opportunities = 0, num_realized = 0, i;
854
  VEC (ext_cand, heap) *reinsn_list;
855
  VEC (rtx, heap) *reinsn_del_list;
856
  ext_state state;
857
 
858
  /* Construct DU chain to get all reaching definitions of each
859
     extension instruction.  */
860
  df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
861
  df_analyze ();
862
  df_set_flags (DF_DEFER_INSN_RESCAN);
863
 
864
  max_insn_uid = get_max_uid ();
865
  reinsn_del_list = NULL;
866
  reinsn_list = find_removable_extensions ();
867
  state.defs_list = NULL;
868
  state.copies_list = NULL;
869
  state.modified_list = NULL;
870
  state.work_list = NULL;
871
  if (VEC_empty (ext_cand, reinsn_list))
872
    state.modified = NULL;
873
  else
874
    state.modified = XCNEWVEC (struct ext_modified, max_insn_uid);
875
 
876
  FOR_EACH_VEC_ELT (ext_cand, reinsn_list, i, curr_cand)
877
    {
878
      num_re_opportunities++;
879
 
880
      /* Try to combine the extension with the definition.  */
881
      if (dump_file)
882
        {
883
          fprintf (dump_file, "Trying to eliminate extension:\n");
884
          print_rtl_single (dump_file, curr_cand->insn);
885
        }
886
 
887
      if (combine_reaching_defs (curr_cand, curr_cand->expr, &state))
888
        {
889
          if (dump_file)
890
            fprintf (dump_file, "Eliminated the extension.\n");
891
          num_realized++;
892
          VEC_safe_push (rtx, heap, reinsn_del_list, curr_cand->insn);
893
          state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
894
        }
895
    }
896
 
897
  /* Delete all useless extensions here in one sweep.  */
898
  FOR_EACH_VEC_ELT (rtx, reinsn_del_list, i, curr_insn)
899
    delete_insn (curr_insn);
900
 
901
  VEC_free (ext_cand, heap, reinsn_list);
902
  VEC_free (rtx, heap, reinsn_del_list);
903
  VEC_free (rtx, heap, state.defs_list);
904
  VEC_free (rtx, heap, state.copies_list);
905
  VEC_free (rtx, heap, state.modified_list);
906
  VEC_free (rtx, heap, state.work_list);
907
  XDELETEVEC (state.modified);
908
 
909
  if (dump_file && num_re_opportunities > 0)
910
    fprintf (dump_file, "Elimination opportunities = %d realized = %d\n",
911
             num_re_opportunities, num_realized);
912
 
913
  df_finish_pass (false);
914
}
915
 
916
/* Find and remove redundant extensions.  */
917
 
918
static unsigned int
919
rest_of_handle_ree (void)
920
{
921
  timevar_push (TV_REE);
922
  find_and_remove_re ();
923
  timevar_pop (TV_REE);
924
  return 0;
925
}
926
 
927
/* Run REE pass when flag_ree is set at optimization level > 0.  */
928
 
929
static bool
930
gate_handle_ree (void)
931
{
932
  return (optimize > 0 && flag_ree);
933
}
934
 
935
struct rtl_opt_pass pass_ree =
936
{
937
 {
938
  RTL_PASS,
939
  "ree",                                /* name */
940
  gate_handle_ree,                      /* gate */
941
  rest_of_handle_ree,                   /* execute */
942
  NULL,                                 /* sub */
943
  NULL,                                 /* next */
944
  0,                                    /* static_pass_number */
945
  TV_REE,                               /* tv_id */
946
  0,                                    /* properties_required */
947
  0,                                    /* properties_provided */
948
  0,                                    /* properties_destroyed */
949
  0,                                    /* todo_flags_start */
950
  TODO_ggc_collect |
951
  TODO_verify_rtl_sharing,              /* todo_flags_finish */
952
 }
953
};

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.