OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [expmed.c] - Blame information for rev 308

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* Medium-level subroutines: convert bit-field store and extract
2
   and shifts, multiplies and divides to rtl instructions.
3
   Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4
   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
5
   Free Software Foundation, Inc.
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify it under
10
the terms of the GNU General Public License as published by the Free
11
Software Foundation; either version 3, or (at your option) any later
12
version.
13
 
14
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
 
24
#include "config.h"
25
#include "system.h"
26
#include "coretypes.h"
27
#include "tm.h"
28
#include "toplev.h"
29
#include "rtl.h"
30
#include "tree.h"
31
#include "tm_p.h"
32
#include "flags.h"
33
#include "insn-config.h"
34
#include "expr.h"
35
#include "optabs.h"
36
#include "real.h"
37
#include "recog.h"
38
#include "langhooks.h"
39
#include "df.h"
40
#include "target.h"
41
 
42
static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
43
                                   unsigned HOST_WIDE_INT,
44
                                   unsigned HOST_WIDE_INT, rtx);
45
static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
46
                                   unsigned HOST_WIDE_INT, rtx);
47
static rtx extract_fixed_bit_field (enum machine_mode, rtx,
48
                                    unsigned HOST_WIDE_INT,
49
                                    unsigned HOST_WIDE_INT,
50
                                    unsigned HOST_WIDE_INT, rtx, int);
51
static rtx mask_rtx (enum machine_mode, int, int, int);
52
static rtx lshift_value (enum machine_mode, rtx, int, int);
53
static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
54
                                    unsigned HOST_WIDE_INT, int);
55
static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
56
static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
57
static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
58
 
59
/* Test whether a value is zero of a power of two.  */
60
#define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
61
 
62
/* Nonzero means divides or modulus operations are relatively cheap for
63
   powers of two, so don't use branches; emit the operation instead.
64
   Usually, this will mean that the MD file will emit non-branch
65
   sequences.  */
66
 
67
static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES];
68
static bool smod_pow2_cheap[2][NUM_MACHINE_MODES];
69
 
70
#ifndef SLOW_UNALIGNED_ACCESS
71
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
72
#endif
73
 
74
/* For compilers that support multiple targets with different word sizes,
75
   MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
76
   is the H8/300(H) compiler.  */
77
 
78
#ifndef MAX_BITS_PER_WORD
79
#define MAX_BITS_PER_WORD BITS_PER_WORD
80
#endif
81
 
82
/* Reduce conditional compilation elsewhere.  */
83
#ifndef HAVE_insv
84
#define HAVE_insv       0
85
#define CODE_FOR_insv   CODE_FOR_nothing
86
#define gen_insv(a,b,c,d) NULL_RTX
87
#endif
88
#ifndef HAVE_extv
89
#define HAVE_extv       0
90
#define CODE_FOR_extv   CODE_FOR_nothing
91
#define gen_extv(a,b,c,d) NULL_RTX
92
#endif
93
#ifndef HAVE_extzv
94
#define HAVE_extzv      0
95
#define CODE_FOR_extzv  CODE_FOR_nothing
96
#define gen_extzv(a,b,c,d) NULL_RTX
97
#endif
98
 
99
/* Cost of various pieces of RTL.  Note that some of these are indexed by
100
   shift count and some by mode.  */
101
static int zero_cost[2];
102
static int add_cost[2][NUM_MACHINE_MODES];
103
static int neg_cost[2][NUM_MACHINE_MODES];
104
static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
105
static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
106
static int shiftsub0_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
107
static int shiftsub1_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
108
static int mul_cost[2][NUM_MACHINE_MODES];
109
static int sdiv_cost[2][NUM_MACHINE_MODES];
110
static int udiv_cost[2][NUM_MACHINE_MODES];
111
static int mul_widen_cost[2][NUM_MACHINE_MODES];
112
static int mul_highpart_cost[2][NUM_MACHINE_MODES];
113
 
114
void
115
init_expmed (void)
116
{
117
  struct
118
  {
119
    struct rtx_def reg;         rtunion reg_fld[2];
120
    struct rtx_def plus;        rtunion plus_fld1;
121
    struct rtx_def neg;
122
    struct rtx_def mult;        rtunion mult_fld1;
123
    struct rtx_def sdiv;        rtunion sdiv_fld1;
124
    struct rtx_def udiv;        rtunion udiv_fld1;
125
    struct rtx_def zext;
126
    struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
127
    struct rtx_def smod_32;     rtunion smod_32_fld1;
128
    struct rtx_def wide_mult;   rtunion wide_mult_fld1;
129
    struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
130
    struct rtx_def wide_trunc;
131
    struct rtx_def shift;       rtunion shift_fld1;
132
    struct rtx_def shift_mult;  rtunion shift_mult_fld1;
133
    struct rtx_def shift_add;   rtunion shift_add_fld1;
134
    struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
135
    struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
136
  } all;
137
 
138
  rtx pow2[MAX_BITS_PER_WORD];
139
  rtx cint[MAX_BITS_PER_WORD];
140
  int m, n;
141
  enum machine_mode mode, wider_mode;
142
  int speed;
143
 
144
 
145
  for (m = 1; m < MAX_BITS_PER_WORD; m++)
146
    {
147
      pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
148
      cint[m] = GEN_INT (m);
149
    }
150
  memset (&all, 0, sizeof all);
151
 
152
  PUT_CODE (&all.reg, REG);
153
  /* Avoid using hard regs in ways which may be unsupported.  */
154
  SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
155
 
156
  PUT_CODE (&all.plus, PLUS);
157
  XEXP (&all.plus, 0) = &all.reg;
158
  XEXP (&all.plus, 1) = &all.reg;
159
 
160
  PUT_CODE (&all.neg, NEG);
161
  XEXP (&all.neg, 0) = &all.reg;
162
 
163
  PUT_CODE (&all.mult, MULT);
164
  XEXP (&all.mult, 0) = &all.reg;
165
  XEXP (&all.mult, 1) = &all.reg;
166
 
167
  PUT_CODE (&all.sdiv, DIV);
168
  XEXP (&all.sdiv, 0) = &all.reg;
169
  XEXP (&all.sdiv, 1) = &all.reg;
170
 
171
  PUT_CODE (&all.udiv, UDIV);
172
  XEXP (&all.udiv, 0) = &all.reg;
173
  XEXP (&all.udiv, 1) = &all.reg;
174
 
175
  PUT_CODE (&all.sdiv_32, DIV);
176
  XEXP (&all.sdiv_32, 0) = &all.reg;
177
  XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
178
 
179
  PUT_CODE (&all.smod_32, MOD);
180
  XEXP (&all.smod_32, 0) = &all.reg;
181
  XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
182
 
183
  PUT_CODE (&all.zext, ZERO_EXTEND);
184
  XEXP (&all.zext, 0) = &all.reg;
185
 
186
  PUT_CODE (&all.wide_mult, MULT);
187
  XEXP (&all.wide_mult, 0) = &all.zext;
188
  XEXP (&all.wide_mult, 1) = &all.zext;
189
 
190
  PUT_CODE (&all.wide_lshr, LSHIFTRT);
191
  XEXP (&all.wide_lshr, 0) = &all.wide_mult;
192
 
193
  PUT_CODE (&all.wide_trunc, TRUNCATE);
194
  XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
195
 
196
  PUT_CODE (&all.shift, ASHIFT);
197
  XEXP (&all.shift, 0) = &all.reg;
198
 
199
  PUT_CODE (&all.shift_mult, MULT);
200
  XEXP (&all.shift_mult, 0) = &all.reg;
201
 
202
  PUT_CODE (&all.shift_add, PLUS);
203
  XEXP (&all.shift_add, 0) = &all.shift_mult;
204
  XEXP (&all.shift_add, 1) = &all.reg;
205
 
206
  PUT_CODE (&all.shift_sub0, MINUS);
207
  XEXP (&all.shift_sub0, 0) = &all.shift_mult;
208
  XEXP (&all.shift_sub0, 1) = &all.reg;
209
 
210
  PUT_CODE (&all.shift_sub1, MINUS);
211
  XEXP (&all.shift_sub1, 0) = &all.reg;
212
  XEXP (&all.shift_sub1, 1) = &all.shift_mult;
213
 
214
  for (speed = 0; speed < 2; speed++)
215
    {
216
      crtl->maybe_hot_insn_p = speed;
217
      zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
218
 
219
      for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
220
           mode != VOIDmode;
221
           mode = GET_MODE_WIDER_MODE (mode))
222
        {
223
          PUT_MODE (&all.reg, mode);
224
          PUT_MODE (&all.plus, mode);
225
          PUT_MODE (&all.neg, mode);
226
          PUT_MODE (&all.mult, mode);
227
          PUT_MODE (&all.sdiv, mode);
228
          PUT_MODE (&all.udiv, mode);
229
          PUT_MODE (&all.sdiv_32, mode);
230
          PUT_MODE (&all.smod_32, mode);
231
          PUT_MODE (&all.wide_trunc, mode);
232
          PUT_MODE (&all.shift, mode);
233
          PUT_MODE (&all.shift_mult, mode);
234
          PUT_MODE (&all.shift_add, mode);
235
          PUT_MODE (&all.shift_sub0, mode);
236
          PUT_MODE (&all.shift_sub1, mode);
237
 
238
          add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
239
          neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
240
          mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
241
          sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
242
          udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
243
 
244
          sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
245
                                          <= 2 * add_cost[speed][mode]);
246
          smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
247
                                          <= 4 * add_cost[speed][mode]);
248
 
249
          wider_mode = GET_MODE_WIDER_MODE (mode);
250
          if (wider_mode != VOIDmode)
251
            {
252
              PUT_MODE (&all.zext, wider_mode);
253
              PUT_MODE (&all.wide_mult, wider_mode);
254
              PUT_MODE (&all.wide_lshr, wider_mode);
255
              XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
256
 
257
              mul_widen_cost[speed][wider_mode]
258
                = rtx_cost (&all.wide_mult, SET, speed);
259
              mul_highpart_cost[speed][mode]
260
                = rtx_cost (&all.wide_trunc, SET, speed);
261
            }
262
 
263
          shift_cost[speed][mode][0] = 0;
264
          shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
265
            = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
266
 
267
          n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
268
          for (m = 1; m < n; m++)
269
            {
270
              XEXP (&all.shift, 1) = cint[m];
271
              XEXP (&all.shift_mult, 1) = pow2[m];
272
 
273
              shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
274
              shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
275
              shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
276
              shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
277
            }
278
        }
279
    }
280
  default_rtl_profile ();
281
}
282
 
283
/* Return an rtx representing minus the value of X.
284
   MODE is the intended mode of the result,
285
   useful if X is a CONST_INT.  */
286
 
287
rtx
288
negate_rtx (enum machine_mode mode, rtx x)
289
{
290
  rtx result = simplify_unary_operation (NEG, mode, x, mode);
291
 
292
  if (result == 0)
293
    result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
294
 
295
  return result;
296
}
297
 
298
/* Report on the availability of insv/extv/extzv and the desired mode
299
   of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
300
   is false; else the mode of the specified operand.  If OPNO is -1,
301
   all the caller cares about is whether the insn is available.  */
302
enum machine_mode
303
mode_for_extraction (enum extraction_pattern pattern, int opno)
304
{
305
  const struct insn_data *data;
306
 
307
  switch (pattern)
308
    {
309
    case EP_insv:
310
      if (HAVE_insv)
311
        {
312
          data = &insn_data[CODE_FOR_insv];
313
          break;
314
        }
315
      return MAX_MACHINE_MODE;
316
 
317
    case EP_extv:
318
      if (HAVE_extv)
319
        {
320
          data = &insn_data[CODE_FOR_extv];
321
          break;
322
        }
323
      return MAX_MACHINE_MODE;
324
 
325
    case EP_extzv:
326
      if (HAVE_extzv)
327
        {
328
          data = &insn_data[CODE_FOR_extzv];
329
          break;
330
        }
331
      return MAX_MACHINE_MODE;
332
 
333
    default:
334
      gcc_unreachable ();
335
    }
336
 
337
  if (opno == -1)
338
    return VOIDmode;
339
 
340
  /* Everyone who uses this function used to follow it with
341
     if (result == VOIDmode) result = word_mode; */
342
  if (data->operand[opno].mode == VOIDmode)
343
    return word_mode;
344
  return data->operand[opno].mode;
345
}
346
 
347
/* Return true if X, of mode MODE, matches the predicate for operand
348
   OPNO of instruction ICODE.  Allow volatile memories, regardless of
349
   the ambient volatile_ok setting.  */
350
 
351
static bool
352
check_predicate_volatile_ok (enum insn_code icode, int opno,
353
                             rtx x, enum machine_mode mode)
354
{
355
  bool save_volatile_ok, result;
356
 
357
  save_volatile_ok = volatile_ok;
358
  result = insn_data[(int) icode].operand[opno].predicate (x, mode);
359
  volatile_ok = save_volatile_ok;
360
  return result;
361
}
362
 
363
/* A subroutine of store_bit_field, with the same arguments.  Return true
364
   if the operation could be implemented.
365
 
366
   If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
367
   no other way of implementing the operation.  If FALLBACK_P is false,
368
   return false instead.  */
369
 
370
static bool
371
store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
372
                   unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
373
                   rtx value, bool fallback_p)
374
{
375
  unsigned int unit
376
    = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
377
  unsigned HOST_WIDE_INT offset, bitpos;
378
  rtx op0 = str_rtx;
379
  int byte_offset;
380
  rtx orig_value;
381
 
382
  enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
383
 
384
  while (GET_CODE (op0) == SUBREG)
385
    {
386
      /* The following line once was done only if WORDS_BIG_ENDIAN,
387
         but I think that is a mistake.  WORDS_BIG_ENDIAN is
388
         meaningful at a much higher level; when structures are copied
389
         between memory and regs, the higher-numbered regs
390
         always get higher addresses.  */
391
      int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
392
      int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
393
 
394
      byte_offset = 0;
395
 
396
      /* Paradoxical subregs need special handling on big endian machines.  */
397
      if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
398
        {
399
          int difference = inner_mode_size - outer_mode_size;
400
 
401
          if (WORDS_BIG_ENDIAN)
402
            byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
403
          if (BYTES_BIG_ENDIAN)
404
            byte_offset += difference % UNITS_PER_WORD;
405
        }
406
      else
407
        byte_offset = SUBREG_BYTE (op0);
408
 
409
      bitnum += byte_offset * BITS_PER_UNIT;
410
      op0 = SUBREG_REG (op0);
411
    }
412
 
413
  /* No action is needed if the target is a register and if the field
414
     lies completely outside that register.  This can occur if the source
415
     code contains an out-of-bounds access to a small array.  */
416
  if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
417
    return true;
418
 
419
  /* Use vec_set patterns for inserting parts of vectors whenever
420
     available.  */
421
  if (VECTOR_MODE_P (GET_MODE (op0))
422
      && !MEM_P (op0)
423
      && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
424
          != CODE_FOR_nothing)
425
      && fieldmode == GET_MODE_INNER (GET_MODE (op0))
426
      && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
427
      && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
428
    {
429
      enum machine_mode outermode = GET_MODE (op0);
430
      enum machine_mode innermode = GET_MODE_INNER (outermode);
431
      int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
432
      int pos = bitnum / GET_MODE_BITSIZE (innermode);
433
      rtx rtxpos = GEN_INT (pos);
434
      rtx src = value;
435
      rtx dest = op0;
436
      rtx pat, seq;
437
      enum machine_mode mode0 = insn_data[icode].operand[0].mode;
438
      enum machine_mode mode1 = insn_data[icode].operand[1].mode;
439
      enum machine_mode mode2 = insn_data[icode].operand[2].mode;
440
 
441
      start_sequence ();
442
 
443
      if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
444
        src = copy_to_mode_reg (mode1, src);
445
 
446
      if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
447
        rtxpos = copy_to_mode_reg (mode1, rtxpos);
448
 
449
      /* We could handle this, but we should always be called with a pseudo
450
         for our targets and all insns should take them as outputs.  */
451
      gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
452
                  && (*insn_data[icode].operand[1].predicate) (src, mode1)
453
                  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
454
      pat = GEN_FCN (icode) (dest, src, rtxpos);
455
      seq = get_insns ();
456
      end_sequence ();
457
      if (pat)
458
        {
459
          emit_insn (seq);
460
          emit_insn (pat);
461
          return true;
462
        }
463
    }
464
 
465
  /* If the target is a register, overwriting the entire object, or storing
466
     a full-word or multi-word field can be done with just a SUBREG.
467
 
468
     If the target is memory, storing any naturally aligned field can be
469
     done with a simple store.  For targets that support fast unaligned
470
     memory, any naturally sized, unit aligned field can be done directly.  */
471
 
472
  offset = bitnum / unit;
473
  bitpos = bitnum % unit;
474
  byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
475
                + (offset * UNITS_PER_WORD);
476
 
477
  if (bitpos == 0
478
      && bitsize == GET_MODE_BITSIZE (fieldmode)
479
      && (!MEM_P (op0)
480
          ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
481
             || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
482
             && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
483
          : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
484
             || (offset * BITS_PER_UNIT % bitsize == 0
485
                 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
486
    {
487
      if (MEM_P (op0))
488
        op0 = adjust_address (op0, fieldmode, offset);
489
      else if (GET_MODE (op0) != fieldmode)
490
        op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
491
                                   byte_offset);
492
      emit_move_insn (op0, value);
493
      return true;
494
    }
495
 
496
  /* Make sure we are playing with integral modes.  Pun with subregs
497
     if we aren't.  This must come after the entire register case above,
498
     since that case is valid for any mode.  The following cases are only
499
     valid for integral modes.  */
500
  {
501
    enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
502
    if (imode != GET_MODE (op0))
503
      {
504
        if (MEM_P (op0))
505
          op0 = adjust_address (op0, imode, 0);
506
        else
507
          {
508
            gcc_assert (imode != BLKmode);
509
            op0 = gen_lowpart (imode, op0);
510
          }
511
      }
512
  }
513
 
514
  /* We may be accessing data outside the field, which means
515
     we can alias adjacent data.  */
516
  if (MEM_P (op0))
517
    {
518
      op0 = shallow_copy_rtx (op0);
519
      set_mem_alias_set (op0, 0);
520
      set_mem_expr (op0, 0);
521
    }
522
 
523
  /* If OP0 is a register, BITPOS must count within a word.
524
     But as we have it, it counts within whatever size OP0 now has.
525
     On a bigendian machine, these are not the same, so convert.  */
526
  if (BYTES_BIG_ENDIAN
527
      && !MEM_P (op0)
528
      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
529
    bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
530
 
531
  /* Storing an lsb-aligned field in a register
532
     can be done with a movestrict instruction.  */
533
 
534
  if (!MEM_P (op0)
535
      && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
536
      && bitsize == GET_MODE_BITSIZE (fieldmode)
537
      && (optab_handler (movstrict_optab, fieldmode)->insn_code
538
          != CODE_FOR_nothing))
539
    {
540
      int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
541
      rtx insn;
542
      rtx start = get_last_insn ();
543
      rtx arg0 = op0;
544
 
545
      /* Get appropriate low part of the value being stored.  */
546
      if (CONST_INT_P (value) || REG_P (value))
547
        value = gen_lowpart (fieldmode, value);
548
      else if (!(GET_CODE (value) == SYMBOL_REF
549
                 || GET_CODE (value) == LABEL_REF
550
                 || GET_CODE (value) == CONST))
551
        value = convert_to_mode (fieldmode, value, 0);
552
 
553
      if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
554
        value = copy_to_mode_reg (fieldmode, value);
555
 
556
      if (GET_CODE (op0) == SUBREG)
557
        {
558
          /* Else we've got some float mode source being extracted into
559
             a different float mode destination -- this combination of
560
             subregs results in Severe Tire Damage.  */
561
          gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
562
                      || GET_MODE_CLASS (fieldmode) == MODE_INT
563
                      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
564
          arg0 = SUBREG_REG (op0);
565
        }
566
 
567
      insn = (GEN_FCN (icode)
568
                 (gen_rtx_SUBREG (fieldmode, arg0,
569
                                  (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
570
                                  + (offset * UNITS_PER_WORD)),
571
                                  value));
572
      if (insn)
573
        {
574
          emit_insn (insn);
575
          return true;
576
        }
577
      delete_insns_since (start);
578
    }
579
 
580
  /* Handle fields bigger than a word.  */
581
 
582
  if (bitsize > BITS_PER_WORD)
583
    {
584
      /* Here we transfer the words of the field
585
         in the order least significant first.
586
         This is because the most significant word is the one which may
587
         be less than full.
588
         However, only do that if the value is not BLKmode.  */
589
 
590
      unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
591
      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
592
      unsigned int i;
593
      rtx last;
594
 
595
      /* This is the mode we must force value to, so that there will be enough
596
         subwords to extract.  Note that fieldmode will often (always?) be
597
         VOIDmode, because that is what store_field uses to indicate that this
598
         is a bit field, but passing VOIDmode to operand_subword_force
599
         is not allowed.  */
600
      fieldmode = GET_MODE (value);
601
      if (fieldmode == VOIDmode)
602
        fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
603
 
604
      last = get_last_insn ();
605
      for (i = 0; i < nwords; i++)
606
        {
607
          /* If I is 0, use the low-order word in both field and target;
608
             if I is 1, use the next to lowest word; and so on.  */
609
          unsigned int wordnum = (backwards ? nwords - i - 1 : i);
610
          unsigned int bit_offset = (backwards
611
                                     ? MAX ((int) bitsize - ((int) i + 1)
612
                                            * BITS_PER_WORD,
613
                                            0)
614
                                     : (int) i * BITS_PER_WORD);
615
          rtx value_word = operand_subword_force (value, wordnum, fieldmode);
616
 
617
          if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
618
                                            bitsize - i * BITS_PER_WORD),
619
                                  bitnum + bit_offset, word_mode,
620
                                  value_word, fallback_p))
621
            {
622
              delete_insns_since (last);
623
              return false;
624
            }
625
        }
626
      return true;
627
    }
628
 
629
  /* From here on we can assume that the field to be stored in is
630
     a full-word (whatever type that is), since it is shorter than a word.  */
631
 
632
  /* OFFSET is the number of words or bytes (UNIT says which)
633
     from STR_RTX to the first word or byte containing part of the field.  */
634
 
635
  if (!MEM_P (op0))
636
    {
637
      if (offset != 0
638
          || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
639
        {
640
          if (!REG_P (op0))
641
            {
642
              /* Since this is a destination (lvalue), we can't copy
643
                 it to a pseudo.  We can remove a SUBREG that does not
644
                 change the size of the operand.  Such a SUBREG may
645
                 have been added above.  */
646
              gcc_assert (GET_CODE (op0) == SUBREG
647
                          && (GET_MODE_SIZE (GET_MODE (op0))
648
                              == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
649
              op0 = SUBREG_REG (op0);
650
            }
651
          op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
652
                                op0, (offset * UNITS_PER_WORD));
653
        }
654
      offset = 0;
655
    }
656
 
657
  /* If VALUE has a floating-point or complex mode, access it as an
658
     integer of the corresponding size.  This can occur on a machine
659
     with 64 bit registers that uses SFmode for float.  It can also
660
     occur for unaligned float or complex fields.  */
661
  orig_value = value;
662
  if (GET_MODE (value) != VOIDmode
663
      && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
664
      && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
665
    {
666
      value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
667
      emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
668
    }
669
 
670
  /* Now OFFSET is nonzero only if OP0 is memory
671
     and is therefore always measured in bytes.  */
672
 
673
  if (HAVE_insv
674
      && GET_MODE (value) != BLKmode
675
      && bitsize > 0
676
      && GET_MODE_BITSIZE (op_mode) >= bitsize
677
      && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
678
            && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
679
      && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
680
                                                        VOIDmode)
681
      && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
682
    {
683
      int xbitpos = bitpos;
684
      rtx value1;
685
      rtx xop0 = op0;
686
      rtx last = get_last_insn ();
687
      rtx pat;
688
      bool copy_back = false;
689
 
690
      /* Add OFFSET into OP0's address.  */
691
      if (MEM_P (xop0))
692
        xop0 = adjust_address (xop0, byte_mode, offset);
693
 
694
      /* If xop0 is a register, we need it in OP_MODE
695
         to make it acceptable to the format of insv.  */
696
      if (GET_CODE (xop0) == SUBREG)
697
        /* We can't just change the mode, because this might clobber op0,
698
           and we will need the original value of op0 if insv fails.  */
699
        xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
700
      if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
701
        xop0 = gen_lowpart_SUBREG (op_mode, xop0);
702
 
703
      /* If the destination is a paradoxical subreg such that we need a
704
         truncate to the inner mode, perform the insertion on a temporary and
705
         truncate the result to the original destination.  Note that we can't
706
         just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
707
         X) 0)) is (reg:N X).  */
708
      if (GET_CODE (xop0) == SUBREG
709
          && REG_P (SUBREG_REG (xop0))
710
          && (!TRULY_NOOP_TRUNCATION
711
              (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))),
712
               GET_MODE_BITSIZE (op_mode))))
713
        {
714
          rtx tem = gen_reg_rtx (op_mode);
715
          emit_move_insn (tem, xop0);
716
          xop0 = tem;
717
          copy_back = true;
718
        }
719
 
720
      /* On big-endian machines, we count bits from the most significant.
721
         If the bit field insn does not, we must invert.  */
722
 
723
      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
724
        xbitpos = unit - bitsize - xbitpos;
725
 
726
      /* We have been counting XBITPOS within UNIT.
727
         Count instead within the size of the register.  */
728
      if (BITS_BIG_ENDIAN && !MEM_P (xop0))
729
        xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
730
 
731
      unit = GET_MODE_BITSIZE (op_mode);
732
 
733
      /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
734
      value1 = value;
735
      if (GET_MODE (value) != op_mode)
736
        {
737
          if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
738
            {
739
              /* Optimization: Don't bother really extending VALUE
740
                 if it has all the bits we will actually use.  However,
741
                 if we must narrow it, be sure we do it correctly.  */
742
 
743
              if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
744
                {
745
                  rtx tmp;
746
 
747
                  tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
748
                  if (! tmp)
749
                    tmp = simplify_gen_subreg (op_mode,
750
                                               force_reg (GET_MODE (value),
751
                                                          value1),
752
                                               GET_MODE (value), 0);
753
                  value1 = tmp;
754
                }
755
              else
756
                value1 = gen_lowpart (op_mode, value1);
757
            }
758
          else if (CONST_INT_P (value))
759
            value1 = gen_int_mode (INTVAL (value), op_mode);
760
          else
761
            /* Parse phase is supposed to make VALUE's data type
762
               match that of the component reference, which is a type
763
               at least as wide as the field; so VALUE should have
764
               a mode that corresponds to that type.  */
765
            gcc_assert (CONSTANT_P (value));
766
        }
767
 
768
      /* If this machine's insv insists on a register,
769
         get VALUE1 into a register.  */
770
      if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
771
             (value1, op_mode)))
772
        value1 = force_reg (op_mode, value1);
773
 
774
      pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
775
      if (pat)
776
        {
777
          emit_insn (pat);
778
 
779
          if (copy_back)
780
            convert_move (op0, xop0, true);
781
          return true;
782
        }
783
      delete_insns_since (last);
784
    }
785
 
786
  /* If OP0 is a memory, try copying it to a register and seeing if a
787
     cheap register alternative is available.  */
788
  if (HAVE_insv && MEM_P (op0))
789
    {
790
      enum machine_mode bestmode;
791
 
792
      /* Get the mode to use for inserting into this field.  If OP0 is
793
         BLKmode, get the smallest mode consistent with the alignment. If
794
         OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
795
         mode. Otherwise, use the smallest mode containing the field.  */
796
 
797
      if (GET_MODE (op0) == BLKmode
798
          || (op_mode != MAX_MACHINE_MODE
799
              && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
800
        bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
801
                                  (op_mode == MAX_MACHINE_MODE
802
                                   ? VOIDmode : op_mode),
803
                                  MEM_VOLATILE_P (op0));
804
      else
805
        bestmode = GET_MODE (op0);
806
 
807
      if (bestmode != VOIDmode
808
          && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
809
          && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
810
               && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
811
        {
812
          rtx last, tempreg, xop0;
813
          unsigned HOST_WIDE_INT xoffset, xbitpos;
814
 
815
          last = get_last_insn ();
816
 
817
          /* Adjust address to point to the containing unit of
818
             that mode.  Compute the offset as a multiple of this unit,
819
             counting in bytes.  */
820
          unit = GET_MODE_BITSIZE (bestmode);
821
          xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
822
          xbitpos = bitnum % unit;
823
          xop0 = adjust_address (op0, bestmode, xoffset);
824
 
825
          /* Fetch that unit, store the bitfield in it, then store
826
             the unit.  */
827
          tempreg = copy_to_reg (xop0);
828
          if (store_bit_field_1 (tempreg, bitsize, xbitpos,
829
                                 fieldmode, orig_value, false))
830
            {
831
              emit_move_insn (xop0, tempreg);
832
              return true;
833
            }
834
          delete_insns_since (last);
835
        }
836
    }
837
 
838
  if (!fallback_p)
839
    return false;
840
 
841
  store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
842
  return true;
843
}
844
 
845
/* Generate code to store value from rtx VALUE
846
   into a bit-field within structure STR_RTX
847
   containing BITSIZE bits starting at bit BITNUM.
848
   FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
849
 
850
void
851
store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
852
                 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
853
                 rtx value)
854
{
855
  if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
856
    gcc_unreachable ();
857
}
858
 
859
/* Use shifts and boolean operations to store VALUE
860
   into a bit field of width BITSIZE
861
   in a memory location specified by OP0 except offset by OFFSET bytes.
862
     (OFFSET must be 0 if OP0 is a register.)
863
   The field starts at position BITPOS within the byte.
864
    (If OP0 is a register, it may be a full word or a narrower mode,
865
     but BITPOS still counts within a full word,
866
     which is significant on bigendian machines.)  */
867
 
868
static void
869
store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
870
                       unsigned HOST_WIDE_INT bitsize,
871
                       unsigned HOST_WIDE_INT bitpos, rtx value)
872
{
873
  enum machine_mode mode;
874
  unsigned int total_bits = BITS_PER_WORD;
875
  rtx temp;
876
  int all_zero = 0;
877
  int all_one = 0;
878
 
879
  /* There is a case not handled here:
880
     a structure with a known alignment of just a halfword
881
     and a field split across two aligned halfwords within the structure.
882
     Or likewise a structure with a known alignment of just a byte
883
     and a field split across two bytes.
884
     Such cases are not supposed to be able to occur.  */
885
 
886
  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
887
    {
888
      gcc_assert (!offset);
889
      /* Special treatment for a bit field split across two registers.  */
890
      if (bitsize + bitpos > BITS_PER_WORD)
891
        {
892
          store_split_bit_field (op0, bitsize, bitpos, value);
893
          return;
894
        }
895
    }
896
  else
897
    {
898
      /* Get the proper mode to use for this field.  We want a mode that
899
         includes the entire field.  If such a mode would be larger than
900
         a word, we won't be doing the extraction the normal way.
901
         We don't want a mode bigger than the destination.  */
902
 
903
      mode = GET_MODE (op0);
904
      if (GET_MODE_BITSIZE (mode) == 0
905
          || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
906
        mode = word_mode;
907
      mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
908
                            MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
909
 
910
      if (mode == VOIDmode)
911
        {
912
          /* The only way this should occur is if the field spans word
913
             boundaries.  */
914
          store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
915
                                 value);
916
          return;
917
        }
918
 
919
      total_bits = GET_MODE_BITSIZE (mode);
920
 
921
      /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
922
         be in the range 0 to total_bits-1, and put any excess bytes in
923
         OFFSET.  */
924
      if (bitpos >= total_bits)
925
        {
926
          offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
927
          bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
928
                     * BITS_PER_UNIT);
929
        }
930
 
931
      /* Get ref to an aligned byte, halfword, or word containing the field.
932
         Adjust BITPOS to be position within a word,
933
         and OFFSET to be the offset of that word.
934
         Then alter OP0 to refer to that word.  */
935
      bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
936
      offset -= (offset % (total_bits / BITS_PER_UNIT));
937
      op0 = adjust_address (op0, mode, offset);
938
    }
939
 
940
  mode = GET_MODE (op0);
941
 
942
  /* Now MODE is either some integral mode for a MEM as OP0,
943
     or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
944
     The bit field is contained entirely within OP0.
945
     BITPOS is the starting bit number within OP0.
946
     (OP0's mode may actually be narrower than MODE.)  */
947
 
948
  if (BYTES_BIG_ENDIAN)
949
      /* BITPOS is the distance between our msb
950
         and that of the containing datum.
951
         Convert it to the distance from the lsb.  */
952
      bitpos = total_bits - bitsize - bitpos;
953
 
954
  /* Now BITPOS is always the distance between our lsb
955
     and that of OP0.  */
956
 
957
  /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
958
     we must first convert its mode to MODE.  */
959
 
960
  if (CONST_INT_P (value))
961
    {
962
      HOST_WIDE_INT v = INTVAL (value);
963
 
964
      if (bitsize < HOST_BITS_PER_WIDE_INT)
965
        v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
966
 
967
      if (v == 0)
968
        all_zero = 1;
969
      else if ((bitsize < HOST_BITS_PER_WIDE_INT
970
                && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
971
               || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
972
        all_one = 1;
973
 
974
      value = lshift_value (mode, value, bitpos, bitsize);
975
    }
976
  else
977
    {
978
      int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
979
                      && bitpos + bitsize != GET_MODE_BITSIZE (mode));
980
 
981
      if (GET_MODE (value) != mode)
982
        value = convert_to_mode (mode, value, 1);
983
 
984
      if (must_and)
985
        value = expand_binop (mode, and_optab, value,
986
                              mask_rtx (mode, 0, bitsize, 0),
987
                              NULL_RTX, 1, OPTAB_LIB_WIDEN);
988
      if (bitpos > 0)
989
        value = expand_shift (LSHIFT_EXPR, mode, value,
990
                              build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
991
    }
992
 
993
  /* Now clear the chosen bits in OP0,
994
     except that if VALUE is -1 we need not bother.  */
995
  /* We keep the intermediates in registers to allow CSE to combine
996
     consecutive bitfield assignments.  */
997
 
998
  temp = force_reg (mode, op0);
999
 
1000
  if (! all_one)
1001
    {
1002
      temp = expand_binop (mode, and_optab, temp,
1003
                           mask_rtx (mode, bitpos, bitsize, 1),
1004
                           NULL_RTX, 1, OPTAB_LIB_WIDEN);
1005
      temp = force_reg (mode, temp);
1006
    }
1007
 
1008
  /* Now logical-or VALUE into OP0, unless it is zero.  */
1009
 
1010
  if (! all_zero)
1011
    {
1012
      temp = expand_binop (mode, ior_optab, temp, value,
1013
                           NULL_RTX, 1, OPTAB_LIB_WIDEN);
1014
      temp = force_reg (mode, temp);
1015
    }
1016
 
1017
  if (op0 != temp)
1018
    {
1019
      op0 = copy_rtx (op0);
1020
      emit_move_insn (op0, temp);
1021
    }
1022
}
1023
 
1024
/* Store a bit field that is split across multiple accessible memory objects.
1025
 
1026
   OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1027
   BITSIZE is the field width; BITPOS the position of its first bit
1028
   (within the word).
1029
   VALUE is the value to store.
1030
 
1031
   This does not yet handle fields wider than BITS_PER_WORD.  */
1032
 
1033
static void
1034
store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1035
                       unsigned HOST_WIDE_INT bitpos, rtx value)
1036
{
1037
  unsigned int unit;
1038
  unsigned int bitsdone = 0;
1039
 
1040
  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1041
     much at a time.  */
1042
  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1043
    unit = BITS_PER_WORD;
1044
  else
1045
    unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1046
 
1047
  /* If VALUE is a constant other than a CONST_INT, get it into a register in
1048
     WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1049
     that VALUE might be a floating-point constant.  */
1050
  if (CONSTANT_P (value) && !CONST_INT_P (value))
1051
    {
1052
      rtx word = gen_lowpart_common (word_mode, value);
1053
 
1054
      if (word && (value != word))
1055
        value = word;
1056
      else
1057
        value = gen_lowpart_common (word_mode,
1058
                                    force_reg (GET_MODE (value) != VOIDmode
1059
                                               ? GET_MODE (value)
1060
                                               : word_mode, value));
1061
    }
1062
 
1063
  while (bitsdone < bitsize)
1064
    {
1065
      unsigned HOST_WIDE_INT thissize;
1066
      rtx part, word;
1067
      unsigned HOST_WIDE_INT thispos;
1068
      unsigned HOST_WIDE_INT offset;
1069
 
1070
      offset = (bitpos + bitsdone) / unit;
1071
      thispos = (bitpos + bitsdone) % unit;
1072
 
1073
      /* THISSIZE must not overrun a word boundary.  Otherwise,
1074
         store_fixed_bit_field will call us again, and we will mutually
1075
         recurse forever.  */
1076
      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1077
      thissize = MIN (thissize, unit - thispos);
1078
 
1079
      if (BYTES_BIG_ENDIAN)
1080
        {
1081
          int total_bits;
1082
 
1083
          /* We must do an endian conversion exactly the same way as it is
1084
             done in extract_bit_field, so that the two calls to
1085
             extract_fixed_bit_field will have comparable arguments.  */
1086
          if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1087
            total_bits = BITS_PER_WORD;
1088
          else
1089
            total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1090
 
1091
          /* Fetch successively less significant portions.  */
1092
          if (CONST_INT_P (value))
1093
            part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1094
                             >> (bitsize - bitsdone - thissize))
1095
                            & (((HOST_WIDE_INT) 1 << thissize) - 1));
1096
          else
1097
            /* The args are chosen so that the last part includes the
1098
               lsb.  Give extract_bit_field the value it needs (with
1099
               endianness compensation) to fetch the piece we want.  */
1100
            part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1101
                                            total_bits - bitsize + bitsdone,
1102
                                            NULL_RTX, 1);
1103
        }
1104
      else
1105
        {
1106
          /* Fetch successively more significant portions.  */
1107
          if (CONST_INT_P (value))
1108
            part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1109
                             >> bitsdone)
1110
                            & (((HOST_WIDE_INT) 1 << thissize) - 1));
1111
          else
1112
            part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1113
                                            bitsdone, NULL_RTX, 1);
1114
        }
1115
 
1116
      /* If OP0 is a register, then handle OFFSET here.
1117
 
1118
         When handling multiword bitfields, extract_bit_field may pass
1119
         down a word_mode SUBREG of a larger REG for a bitfield that actually
1120
         crosses a word boundary.  Thus, for a SUBREG, we must find
1121
         the current word starting from the base register.  */
1122
      if (GET_CODE (op0) == SUBREG)
1123
        {
1124
          int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1125
          word = operand_subword_force (SUBREG_REG (op0), word_offset,
1126
                                        GET_MODE (SUBREG_REG (op0)));
1127
          offset = 0;
1128
        }
1129
      else if (REG_P (op0))
1130
        {
1131
          word = operand_subword_force (op0, offset, GET_MODE (op0));
1132
          offset = 0;
1133
        }
1134
      else
1135
        word = op0;
1136
 
1137
      /* OFFSET is in UNITs, and UNIT is in bits.
1138
         store_fixed_bit_field wants offset in bytes.  */
1139
      store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1140
                             thispos, part);
1141
      bitsdone += thissize;
1142
    }
1143
}
1144
 
1145
/* A subroutine of extract_bit_field_1 that converts return value X
1146
   to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1147
   to extract_bit_field.  */
1148
 
1149
static rtx
1150
convert_extracted_bit_field (rtx x, enum machine_mode mode,
1151
                             enum machine_mode tmode, bool unsignedp)
1152
{
1153
  if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1154
    return x;
1155
 
1156
  /* If the x mode is not a scalar integral, first convert to the
1157
     integer mode of that size and then access it as a floating-point
1158
     value via a SUBREG.  */
1159
  if (!SCALAR_INT_MODE_P (tmode))
1160
    {
1161
      enum machine_mode smode;
1162
 
1163
      smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1164
      x = convert_to_mode (smode, x, unsignedp);
1165
      x = force_reg (smode, x);
1166
      return gen_lowpart (tmode, x);
1167
    }
1168
 
1169
  return convert_to_mode (tmode, x, unsignedp);
1170
}
1171
 
1172
/* A subroutine of extract_bit_field, with the same arguments.
1173
   If FALLBACK_P is true, fall back to extract_fixed_bit_field
1174
   if we can find no other means of implementing the operation.
1175
   if FALLBACK_P is false, return NULL instead.  */
1176
 
1177
static rtx
1178
extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1179
                     unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1180
                     enum machine_mode mode, enum machine_mode tmode,
1181
                     bool fallback_p)
1182
{
1183
  unsigned int unit
1184
    = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1185
  unsigned HOST_WIDE_INT offset, bitpos;
1186
  rtx op0 = str_rtx;
1187
  enum machine_mode int_mode;
1188
  enum machine_mode ext_mode;
1189
  enum machine_mode mode1;
1190
  enum insn_code icode;
1191
  int byte_offset;
1192
 
1193
  if (tmode == VOIDmode)
1194
    tmode = mode;
1195
 
1196
  while (GET_CODE (op0) == SUBREG)
1197
    {
1198
      bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1199
      op0 = SUBREG_REG (op0);
1200
    }
1201
 
1202
  /* If we have an out-of-bounds access to a register, just return an
1203
     uninitialized register of the required mode.  This can occur if the
1204
     source code contains an out-of-bounds access to a small array.  */
1205
  if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1206
    return gen_reg_rtx (tmode);
1207
 
1208
  if (REG_P (op0)
1209
      && mode == GET_MODE (op0)
1210
      && bitnum == 0
1211
      && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1212
    {
1213
      /* We're trying to extract a full register from itself.  */
1214
      return op0;
1215
    }
1216
 
1217
  /* See if we can get a better vector mode before extracting.  */
1218
  if (VECTOR_MODE_P (GET_MODE (op0))
1219
      && !MEM_P (op0)
1220
      && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1221
    {
1222
      enum machine_mode new_mode;
1223
      int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1224
 
1225
      if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1226
        new_mode = MIN_MODE_VECTOR_FLOAT;
1227
      else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1228
        new_mode = MIN_MODE_VECTOR_FRACT;
1229
      else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1230
        new_mode = MIN_MODE_VECTOR_UFRACT;
1231
      else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1232
        new_mode = MIN_MODE_VECTOR_ACCUM;
1233
      else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1234
        new_mode = MIN_MODE_VECTOR_UACCUM;
1235
      else
1236
        new_mode = MIN_MODE_VECTOR_INT;
1237
 
1238
      for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1239
        if (GET_MODE_NUNITS (new_mode) == nunits
1240
            && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1241
            && targetm.vector_mode_supported_p (new_mode))
1242
          break;
1243
      if (new_mode != VOIDmode)
1244
        op0 = gen_lowpart (new_mode, op0);
1245
    }
1246
 
1247
  /* Use vec_extract patterns for extracting parts of vectors whenever
1248
     available.  */
1249
  if (VECTOR_MODE_P (GET_MODE (op0))
1250
      && !MEM_P (op0)
1251
      && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1252
          != CODE_FOR_nothing)
1253
      && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1254
          == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1255
    {
1256
      enum machine_mode outermode = GET_MODE (op0);
1257
      enum machine_mode innermode = GET_MODE_INNER (outermode);
1258
      int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1259
      unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1260
      rtx rtxpos = GEN_INT (pos);
1261
      rtx src = op0;
1262
      rtx dest = NULL, pat, seq;
1263
      enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1264
      enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1265
      enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1266
 
1267
      if (innermode == tmode || innermode == mode)
1268
        dest = target;
1269
 
1270
      if (!dest)
1271
        dest = gen_reg_rtx (innermode);
1272
 
1273
      start_sequence ();
1274
 
1275
      if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1276
        dest = copy_to_mode_reg (mode0, dest);
1277
 
1278
      if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1279
        src = copy_to_mode_reg (mode1, src);
1280
 
1281
      if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1282
        rtxpos = copy_to_mode_reg (mode1, rtxpos);
1283
 
1284
      /* We could handle this, but we should always be called with a pseudo
1285
         for our targets and all insns should take them as outputs.  */
1286
      gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1287
                  && (*insn_data[icode].operand[1].predicate) (src, mode1)
1288
                  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1289
 
1290
      pat = GEN_FCN (icode) (dest, src, rtxpos);
1291
      seq = get_insns ();
1292
      end_sequence ();
1293
      if (pat)
1294
        {
1295
          emit_insn (seq);
1296
          emit_insn (pat);
1297
          if (mode0 != mode)
1298
            return gen_lowpart (tmode, dest);
1299
          return dest;
1300
        }
1301
    }
1302
 
1303
  /* Make sure we are playing with integral modes.  Pun with subregs
1304
     if we aren't.  */
1305
  {
1306
    enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1307
    if (imode != GET_MODE (op0))
1308
      {
1309
        if (MEM_P (op0))
1310
          op0 = adjust_address (op0, imode, 0);
1311
        else if (imode != BLKmode)
1312
          {
1313
            op0 = gen_lowpart (imode, op0);
1314
 
1315
            /* If we got a SUBREG, force it into a register since we
1316
               aren't going to be able to do another SUBREG on it.  */
1317
            if (GET_CODE (op0) == SUBREG)
1318
              op0 = force_reg (imode, op0);
1319
          }
1320
        else if (REG_P (op0))
1321
          {
1322
            rtx reg, subreg;
1323
            imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1324
                                            MODE_INT);
1325
            reg = gen_reg_rtx (imode);
1326
            subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1327
            emit_move_insn (subreg, op0);
1328
            op0 = reg;
1329
            bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1330
          }
1331
        else
1332
          {
1333
            rtx mem = assign_stack_temp (GET_MODE (op0),
1334
                                         GET_MODE_SIZE (GET_MODE (op0)), 0);
1335
            emit_move_insn (mem, op0);
1336
            op0 = adjust_address (mem, BLKmode, 0);
1337
          }
1338
      }
1339
  }
1340
 
1341
  /* We may be accessing data outside the field, which means
1342
     we can alias adjacent data.  */
1343
  if (MEM_P (op0))
1344
    {
1345
      op0 = shallow_copy_rtx (op0);
1346
      set_mem_alias_set (op0, 0);
1347
      set_mem_expr (op0, 0);
1348
    }
1349
 
1350
  /* Extraction of a full-word or multi-word value from a structure
1351
     in a register or aligned memory can be done with just a SUBREG.
1352
     A subword value in the least significant part of a register
1353
     can also be extracted with a SUBREG.  For this, we need the
1354
     byte offset of the value in op0.  */
1355
 
1356
  bitpos = bitnum % unit;
1357
  offset = bitnum / unit;
1358
  byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1359
 
1360
  /* If OP0 is a register, BITPOS must count within a word.
1361
     But as we have it, it counts within whatever size OP0 now has.
1362
     On a bigendian machine, these are not the same, so convert.  */
1363
  if (BYTES_BIG_ENDIAN
1364
      && !MEM_P (op0)
1365
      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1366
    bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1367
 
1368
  /* ??? We currently assume TARGET is at least as big as BITSIZE.
1369
     If that's wrong, the solution is to test for it and set TARGET to 0
1370
     if needed.  */
1371
 
1372
  /* Only scalar integer modes can be converted via subregs.  There is an
1373
     additional problem for FP modes here in that they can have a precision
1374
     which is different from the size.  mode_for_size uses precision, but
1375
     we want a mode based on the size, so we must avoid calling it for FP
1376
     modes.  */
1377
  mode1  = (SCALAR_INT_MODE_P (tmode)
1378
            ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1379
            : mode);
1380
 
1381
  if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1382
        && bitpos % BITS_PER_WORD == 0)
1383
       || (mode1 != BLKmode
1384
           /* ??? The big endian test here is wrong.  This is correct
1385
              if the value is in a register, and if mode_for_size is not
1386
              the same mode as op0.  This causes us to get unnecessarily
1387
              inefficient code from the Thumb port when -mbig-endian.  */
1388
           && (BYTES_BIG_ENDIAN
1389
               ? bitpos + bitsize == BITS_PER_WORD
1390
               : bitpos == 0)))
1391
      && ((!MEM_P (op0)
1392
           && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1393
                                     GET_MODE_BITSIZE (GET_MODE (op0)))
1394
           && GET_MODE_SIZE (mode1) != 0
1395
           && byte_offset % GET_MODE_SIZE (mode1) == 0)
1396
          || (MEM_P (op0)
1397
              && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1398
                  || (offset * BITS_PER_UNIT % bitsize == 0
1399
                      && MEM_ALIGN (op0) % bitsize == 0)))))
1400
    {
1401
      if (MEM_P (op0))
1402
        op0 = adjust_address (op0, mode1, offset);
1403
      else if (mode1 != GET_MODE (op0))
1404
        {
1405
          rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1406
                                         byte_offset);
1407
          if (sub == NULL)
1408
            goto no_subreg_mode_swap;
1409
          op0 = sub;
1410
        }
1411
      if (mode1 != mode)
1412
        return convert_to_mode (tmode, op0, unsignedp);
1413
      return op0;
1414
    }
1415
 no_subreg_mode_swap:
1416
 
1417
  /* Handle fields bigger than a word.  */
1418
 
1419
  if (bitsize > BITS_PER_WORD)
1420
    {
1421
      /* Here we transfer the words of the field
1422
         in the order least significant first.
1423
         This is because the most significant word is the one which may
1424
         be less than full.  */
1425
 
1426
      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1427
      unsigned int i;
1428
 
1429
      if (target == 0 || !REG_P (target))
1430
        target = gen_reg_rtx (mode);
1431
 
1432
      /* Indicate for flow that the entire target reg is being set.  */
1433
      emit_clobber (target);
1434
 
1435
      for (i = 0; i < nwords; i++)
1436
        {
1437
          /* If I is 0, use the low-order word in both field and target;
1438
             if I is 1, use the next to lowest word; and so on.  */
1439
          /* Word number in TARGET to use.  */
1440
          unsigned int wordnum
1441
            = (WORDS_BIG_ENDIAN
1442
               ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1443
               : i);
1444
          /* Offset from start of field in OP0.  */
1445
          unsigned int bit_offset = (WORDS_BIG_ENDIAN
1446
                                     ? MAX (0, ((int) bitsize - ((int) i + 1)
1447
                                                * (int) BITS_PER_WORD))
1448
                                     : (int) i * BITS_PER_WORD);
1449
          rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1450
          rtx result_part
1451
            = extract_bit_field (op0, MIN (BITS_PER_WORD,
1452
                                           bitsize - i * BITS_PER_WORD),
1453
                                 bitnum + bit_offset, 1, target_part, mode,
1454
                                 word_mode);
1455
 
1456
          gcc_assert (target_part);
1457
 
1458
          if (result_part != target_part)
1459
            emit_move_insn (target_part, result_part);
1460
        }
1461
 
1462
      if (unsignedp)
1463
        {
1464
          /* Unless we've filled TARGET, the upper regs in a multi-reg value
1465
             need to be zero'd out.  */
1466
          if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1467
            {
1468
              unsigned int i, total_words;
1469
 
1470
              total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1471
              for (i = nwords; i < total_words; i++)
1472
                emit_move_insn
1473
                  (operand_subword (target,
1474
                                    WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1475
                                    1, VOIDmode),
1476
                   const0_rtx);
1477
            }
1478
          return target;
1479
        }
1480
 
1481
      /* Signed bit field: sign-extend with two arithmetic shifts.  */
1482
      target = expand_shift (LSHIFT_EXPR, mode, target,
1483
                             build_int_cst (NULL_TREE,
1484
                                            GET_MODE_BITSIZE (mode) - bitsize),
1485
                             NULL_RTX, 0);
1486
      return expand_shift (RSHIFT_EXPR, mode, target,
1487
                           build_int_cst (NULL_TREE,
1488
                                          GET_MODE_BITSIZE (mode) - bitsize),
1489
                           NULL_RTX, 0);
1490
    }
1491
 
1492
  /* From here on we know the desired field is smaller than a word.  */
1493
 
1494
  /* Check if there is a correspondingly-sized integer field, so we can
1495
     safely extract it as one size of integer, if necessary; then
1496
     truncate or extend to the size that is wanted; then use SUBREGs or
1497
     convert_to_mode to get one of the modes we really wanted.  */
1498
 
1499
  int_mode = int_mode_for_mode (tmode);
1500
  if (int_mode == BLKmode)
1501
    int_mode = int_mode_for_mode (mode);
1502
  /* Should probably push op0 out to memory and then do a load.  */
1503
  gcc_assert (int_mode != BLKmode);
1504
 
1505
  /* OFFSET is the number of words or bytes (UNIT says which)
1506
     from STR_RTX to the first word or byte containing part of the field.  */
1507
  if (!MEM_P (op0))
1508
    {
1509
      if (offset != 0
1510
          || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1511
        {
1512
          if (!REG_P (op0))
1513
            op0 = copy_to_reg (op0);
1514
          op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1515
                                op0, (offset * UNITS_PER_WORD));
1516
        }
1517
      offset = 0;
1518
    }
1519
 
1520
  /* Now OFFSET is nonzero only for memory operands.  */
1521
  ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1522
  icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1523
  if (ext_mode != MAX_MACHINE_MODE
1524
      && bitsize > 0
1525
      && GET_MODE_BITSIZE (ext_mode) >= bitsize
1526
      /* If op0 is a register, we need it in EXT_MODE to make it
1527
         acceptable to the format of ext(z)v.  */
1528
      && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1529
      && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1530
           && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1531
      && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1532
    {
1533
      unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1534
      rtx bitsize_rtx, bitpos_rtx;
1535
      rtx last = get_last_insn ();
1536
      rtx xop0 = op0;
1537
      rtx xtarget = target;
1538
      rtx xspec_target = target;
1539
      rtx xspec_target_subreg = 0;
1540
      rtx pat;
1541
 
1542
      /* If op0 is a register, we need it in EXT_MODE to make it
1543
         acceptable to the format of ext(z)v.  */
1544
      if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1545
        xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1546
      if (MEM_P (xop0))
1547
        /* Get ref to first byte containing part of the field.  */
1548
        xop0 = adjust_address (xop0, byte_mode, xoffset);
1549
 
1550
      /* On big-endian machines, we count bits from the most significant.
1551
         If the bit field insn does not, we must invert.  */
1552
      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1553
        xbitpos = unit - bitsize - xbitpos;
1554
 
1555
      /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1556
      if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1557
        xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1558
 
1559
      unit = GET_MODE_BITSIZE (ext_mode);
1560
 
1561
      if (xtarget == 0)
1562
        xtarget = xspec_target = gen_reg_rtx (tmode);
1563
 
1564
      if (GET_MODE (xtarget) != ext_mode)
1565
        {
1566
          /* Don't use LHS paradoxical subreg if explicit truncation is needed
1567
             between the mode of the extraction (word_mode) and the target
1568
             mode.  Instead, create a temporary and use convert_move to set
1569
             the target.  */
1570
          if (REG_P (xtarget)
1571
              && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)),
1572
                                        GET_MODE_BITSIZE (ext_mode)))
1573
            {
1574
              xtarget = gen_lowpart (ext_mode, xtarget);
1575
              if (GET_MODE_SIZE (ext_mode)
1576
                  > GET_MODE_SIZE (GET_MODE (xspec_target)))
1577
                xspec_target_subreg = xtarget;
1578
            }
1579
          else
1580
            xtarget = gen_reg_rtx (ext_mode);
1581
        }
1582
 
1583
      /* If this machine's ext(z)v insists on a register target,
1584
         make sure we have one.  */
1585
      if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1586
        xtarget = gen_reg_rtx (ext_mode);
1587
 
1588
      bitsize_rtx = GEN_INT (bitsize);
1589
      bitpos_rtx = GEN_INT (xbitpos);
1590
 
1591
      pat = (unsignedp
1592
             ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1593
             : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1594
      if (pat)
1595
        {
1596
          emit_insn (pat);
1597
          if (xtarget == xspec_target)
1598
            return xtarget;
1599
          if (xtarget == xspec_target_subreg)
1600
            return xspec_target;
1601
          return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1602
        }
1603
      delete_insns_since (last);
1604
    }
1605
 
1606
  /* If OP0 is a memory, try copying it to a register and seeing if a
1607
     cheap register alternative is available.  */
1608
  if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1609
    {
1610
      enum machine_mode bestmode;
1611
 
1612
      /* Get the mode to use for inserting into this field.  If
1613
         OP0 is BLKmode, get the smallest mode consistent with the
1614
         alignment. If OP0 is a non-BLKmode object that is no
1615
         wider than EXT_MODE, use its mode. Otherwise, use the
1616
         smallest mode containing the field.  */
1617
 
1618
      if (GET_MODE (op0) == BLKmode
1619
          || (ext_mode != MAX_MACHINE_MODE
1620
              && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1621
        bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1622
                                  (ext_mode == MAX_MACHINE_MODE
1623
                                   ? VOIDmode : ext_mode),
1624
                                  MEM_VOLATILE_P (op0));
1625
      else
1626
        bestmode = GET_MODE (op0);
1627
 
1628
      if (bestmode != VOIDmode
1629
          && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1630
               && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1631
        {
1632
          unsigned HOST_WIDE_INT xoffset, xbitpos;
1633
 
1634
          /* Compute the offset as a multiple of this unit,
1635
             counting in bytes.  */
1636
          unit = GET_MODE_BITSIZE (bestmode);
1637
          xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1638
          xbitpos = bitnum % unit;
1639
 
1640
          /* Make sure the register is big enough for the whole field.  */
1641
          if (xoffset * BITS_PER_UNIT + unit
1642
              >= offset * BITS_PER_UNIT + bitsize)
1643
            {
1644
              rtx last, result, xop0;
1645
 
1646
              last = get_last_insn ();
1647
 
1648
              /* Fetch it to a register in that size.  */
1649
              xop0 = adjust_address (op0, bestmode, xoffset);
1650
              xop0 = force_reg (bestmode, xop0);
1651
              result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1652
                                            unsignedp, target,
1653
                                            mode, tmode, false);
1654
              if (result)
1655
                return result;
1656
 
1657
              delete_insns_since (last);
1658
            }
1659
        }
1660
    }
1661
 
1662
  if (!fallback_p)
1663
    return NULL;
1664
 
1665
  target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1666
                                    bitpos, target, unsignedp);
1667
  return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1668
}
1669
 
1670
/* Generate code to extract a byte-field from STR_RTX
1671
   containing BITSIZE bits, starting at BITNUM,
1672
   and put it in TARGET if possible (if TARGET is nonzero).
1673
   Regardless of TARGET, we return the rtx for where the value is placed.
1674
 
1675
   STR_RTX is the structure containing the byte (a REG or MEM).
1676
   UNSIGNEDP is nonzero if this is an unsigned bit field.
1677
   MODE is the natural mode of the field value once extracted.
1678
   TMODE is the mode the caller would like the value to have;
1679
   but the value may be returned with type MODE instead.
1680
 
1681
   If a TARGET is specified and we can store in it at no extra cost,
1682
   we do so, and return TARGET.
1683
   Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1684
   if they are equally easy.  */
1685
 
1686
rtx
1687
extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1688
                   unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1689
                   enum machine_mode mode, enum machine_mode tmode)
1690
{
1691
  return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1692
                              target, mode, tmode, true);
1693
}
1694
 
1695
/* Extract a bit field using shifts and boolean operations
1696
   Returns an rtx to represent the value.
1697
   OP0 addresses a register (word) or memory (byte).
1698
   BITPOS says which bit within the word or byte the bit field starts in.
1699
   OFFSET says how many bytes farther the bit field starts;
1700
    it is 0 if OP0 is a register.
1701
   BITSIZE says how many bits long the bit field is.
1702
    (If OP0 is a register, it may be narrower than a full word,
1703
     but BITPOS still counts within a full word,
1704
     which is significant on bigendian machines.)
1705
 
1706
   UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1707
   If TARGET is nonzero, attempts to store the value there
1708
   and return TARGET, but this is not guaranteed.
1709
   If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1710
 
1711
static rtx
1712
extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1713
                         unsigned HOST_WIDE_INT offset,
1714
                         unsigned HOST_WIDE_INT bitsize,
1715
                         unsigned HOST_WIDE_INT bitpos, rtx target,
1716
                         int unsignedp)
1717
{
1718
  unsigned int total_bits = BITS_PER_WORD;
1719
  enum machine_mode mode;
1720
 
1721
  if (GET_CODE (op0) == SUBREG || REG_P (op0))
1722
    {
1723
      /* Special treatment for a bit field split across two registers.  */
1724
      if (bitsize + bitpos > BITS_PER_WORD)
1725
        return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1726
    }
1727
  else
1728
    {
1729
      /* Get the proper mode to use for this field.  We want a mode that
1730
         includes the entire field.  If such a mode would be larger than
1731
         a word, we won't be doing the extraction the normal way.  */
1732
 
1733
      mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1734
                            MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1735
 
1736
      if (mode == VOIDmode)
1737
        /* The only way this should occur is if the field spans word
1738
           boundaries.  */
1739
        return extract_split_bit_field (op0, bitsize,
1740
                                        bitpos + offset * BITS_PER_UNIT,
1741
                                        unsignedp);
1742
 
1743
      total_bits = GET_MODE_BITSIZE (mode);
1744
 
1745
      /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1746
         be in the range 0 to total_bits-1, and put any excess bytes in
1747
         OFFSET.  */
1748
      if (bitpos >= total_bits)
1749
        {
1750
          offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1751
          bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1752
                     * BITS_PER_UNIT);
1753
        }
1754
 
1755
      /* Get ref to an aligned byte, halfword, or word containing the field.
1756
         Adjust BITPOS to be position within a word,
1757
         and OFFSET to be the offset of that word.
1758
         Then alter OP0 to refer to that word.  */
1759
      bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1760
      offset -= (offset % (total_bits / BITS_PER_UNIT));
1761
      op0 = adjust_address (op0, mode, offset);
1762
    }
1763
 
1764
  mode = GET_MODE (op0);
1765
 
1766
  if (BYTES_BIG_ENDIAN)
1767
    /* BITPOS is the distance between our msb and that of OP0.
1768
       Convert it to the distance from the lsb.  */
1769
    bitpos = total_bits - bitsize - bitpos;
1770
 
1771
  /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1772
     We have reduced the big-endian case to the little-endian case.  */
1773
 
1774
  if (unsignedp)
1775
    {
1776
      if (bitpos)
1777
        {
1778
          /* If the field does not already start at the lsb,
1779
             shift it so it does.  */
1780
          tree amount = build_int_cst (NULL_TREE, bitpos);
1781
          /* Maybe propagate the target for the shift.  */
1782
          /* But not if we will return it--could confuse integrate.c.  */
1783
          rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1784
          if (tmode != mode) subtarget = 0;
1785
          op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1786
        }
1787
      /* Convert the value to the desired mode.  */
1788
      if (mode != tmode)
1789
        op0 = convert_to_mode (tmode, op0, 1);
1790
 
1791
      /* Unless the msb of the field used to be the msb when we shifted,
1792
         mask out the upper bits.  */
1793
 
1794
      if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1795
        return expand_binop (GET_MODE (op0), and_optab, op0,
1796
                             mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1797
                             target, 1, OPTAB_LIB_WIDEN);
1798
      return op0;
1799
    }
1800
 
1801
  /* To extract a signed bit-field, first shift its msb to the msb of the word,
1802
     then arithmetic-shift its lsb to the lsb of the word.  */
1803
  op0 = force_reg (mode, op0);
1804
  if (mode != tmode)
1805
    target = 0;
1806
 
1807
  /* Find the narrowest integer mode that contains the field.  */
1808
 
1809
  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1810
       mode = GET_MODE_WIDER_MODE (mode))
1811
    if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1812
      {
1813
        op0 = convert_to_mode (mode, op0, 0);
1814
        break;
1815
      }
1816
 
1817
  if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1818
    {
1819
      tree amount
1820
        = build_int_cst (NULL_TREE,
1821
                         GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1822
      /* Maybe propagate the target for the shift.  */
1823
      rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1824
      op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1825
    }
1826
 
1827
  return expand_shift (RSHIFT_EXPR, mode, op0,
1828
                       build_int_cst (NULL_TREE,
1829
                                      GET_MODE_BITSIZE (mode) - bitsize),
1830
                       target, 0);
1831
}
1832
 
1833
/* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1834
   of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1835
   complement of that if COMPLEMENT.  The mask is truncated if
1836
   necessary to the width of mode MODE.  The mask is zero-extended if
1837
   BITSIZE+BITPOS is too small for MODE.  */
1838
 
1839
static rtx
1840
mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1841
{
1842
  HOST_WIDE_INT masklow, maskhigh;
1843
 
1844
  if (bitsize == 0)
1845
    masklow = 0;
1846
  else if (bitpos < HOST_BITS_PER_WIDE_INT)
1847
    masklow = (HOST_WIDE_INT) -1 << bitpos;
1848
  else
1849
    masklow = 0;
1850
 
1851
  if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1852
    masklow &= ((unsigned HOST_WIDE_INT) -1
1853
                >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1854
 
1855
  if (bitpos <= HOST_BITS_PER_WIDE_INT)
1856
    maskhigh = -1;
1857
  else
1858
    maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1859
 
1860
  if (bitsize == 0)
1861
    maskhigh = 0;
1862
  else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1863
    maskhigh &= ((unsigned HOST_WIDE_INT) -1
1864
                 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1865
  else
1866
    maskhigh = 0;
1867
 
1868
  if (complement)
1869
    {
1870
      maskhigh = ~maskhigh;
1871
      masklow = ~masklow;
1872
    }
1873
 
1874
  return immed_double_const (masklow, maskhigh, mode);
1875
}
1876
 
1877
/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1878
   VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1879
 
1880
static rtx
1881
lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1882
{
1883
  unsigned HOST_WIDE_INT v = INTVAL (value);
1884
  HOST_WIDE_INT low, high;
1885
 
1886
  if (bitsize < HOST_BITS_PER_WIDE_INT)
1887
    v &= ~((HOST_WIDE_INT) -1 << bitsize);
1888
 
1889
  if (bitpos < HOST_BITS_PER_WIDE_INT)
1890
    {
1891
      low = v << bitpos;
1892
      high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1893
    }
1894
  else
1895
    {
1896
      low = 0;
1897
      high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1898
    }
1899
 
1900
  return immed_double_const (low, high, mode);
1901
}
1902
 
1903
/* Extract a bit field that is split across two words
1904
   and return an RTX for the result.
1905
 
1906
   OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1907
   BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1908
   UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1909
 
1910
static rtx
1911
extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1912
                         unsigned HOST_WIDE_INT bitpos, int unsignedp)
1913
{
1914
  unsigned int unit;
1915
  unsigned int bitsdone = 0;
1916
  rtx result = NULL_RTX;
1917
  int first = 1;
1918
 
1919
  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1920
     much at a time.  */
1921
  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1922
    unit = BITS_PER_WORD;
1923
  else
1924
    unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1925
 
1926
  while (bitsdone < bitsize)
1927
    {
1928
      unsigned HOST_WIDE_INT thissize;
1929
      rtx part, word;
1930
      unsigned HOST_WIDE_INT thispos;
1931
      unsigned HOST_WIDE_INT offset;
1932
 
1933
      offset = (bitpos + bitsdone) / unit;
1934
      thispos = (bitpos + bitsdone) % unit;
1935
 
1936
      /* THISSIZE must not overrun a word boundary.  Otherwise,
1937
         extract_fixed_bit_field will call us again, and we will mutually
1938
         recurse forever.  */
1939
      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1940
      thissize = MIN (thissize, unit - thispos);
1941
 
1942
      /* If OP0 is a register, then handle OFFSET here.
1943
 
1944
         When handling multiword bitfields, extract_bit_field may pass
1945
         down a word_mode SUBREG of a larger REG for a bitfield that actually
1946
         crosses a word boundary.  Thus, for a SUBREG, we must find
1947
         the current word starting from the base register.  */
1948
      if (GET_CODE (op0) == SUBREG)
1949
        {
1950
          int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1951
          word = operand_subword_force (SUBREG_REG (op0), word_offset,
1952
                                        GET_MODE (SUBREG_REG (op0)));
1953
          offset = 0;
1954
        }
1955
      else if (REG_P (op0))
1956
        {
1957
          word = operand_subword_force (op0, offset, GET_MODE (op0));
1958
          offset = 0;
1959
        }
1960
      else
1961
        word = op0;
1962
 
1963
      /* Extract the parts in bit-counting order,
1964
         whose meaning is determined by BYTES_PER_UNIT.
1965
         OFFSET is in UNITs, and UNIT is in bits.
1966
         extract_fixed_bit_field wants offset in bytes.  */
1967
      part = extract_fixed_bit_field (word_mode, word,
1968
                                      offset * unit / BITS_PER_UNIT,
1969
                                      thissize, thispos, 0, 1);
1970
      bitsdone += thissize;
1971
 
1972
      /* Shift this part into place for the result.  */
1973
      if (BYTES_BIG_ENDIAN)
1974
        {
1975
          if (bitsize != bitsdone)
1976
            part = expand_shift (LSHIFT_EXPR, word_mode, part,
1977
                                 build_int_cst (NULL_TREE, bitsize - bitsdone),
1978
                                 0, 1);
1979
        }
1980
      else
1981
        {
1982
          if (bitsdone != thissize)
1983
            part = expand_shift (LSHIFT_EXPR, word_mode, part,
1984
                                 build_int_cst (NULL_TREE,
1985
                                                bitsdone - thissize), 0, 1);
1986
        }
1987
 
1988
      if (first)
1989
        result = part;
1990
      else
1991
        /* Combine the parts with bitwise or.  This works
1992
           because we extracted each part as an unsigned bit field.  */
1993
        result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1994
                               OPTAB_LIB_WIDEN);
1995
 
1996
      first = 0;
1997
    }
1998
 
1999
  /* Unsigned bit field: we are done.  */
2000
  if (unsignedp)
2001
    return result;
2002
  /* Signed bit field: sign-extend with two arithmetic shifts.  */
2003
  result = expand_shift (LSHIFT_EXPR, word_mode, result,
2004
                         build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2005
                         NULL_RTX, 0);
2006
  return expand_shift (RSHIFT_EXPR, word_mode, result,
2007
                       build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2008
                       NULL_RTX, 0);
2009
}
2010
 
2011
/* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2012
   the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2013
   MODE, fill the upper bits with zeros.  Fail if the layout of either
2014
   mode is unknown (as for CC modes) or if the extraction would involve
2015
   unprofitable mode punning.  Return the value on success, otherwise
2016
   return null.
2017
 
2018
   This is different from gen_lowpart* in these respects:
2019
 
2020
     - the returned value must always be considered an rvalue
2021
 
2022
     - when MODE is wider than SRC_MODE, the extraction involves
2023
       a zero extension
2024
 
2025
     - when MODE is smaller than SRC_MODE, the extraction involves
2026
       a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2027
 
2028
   In other words, this routine performs a computation, whereas the
2029
   gen_lowpart* routines are conceptually lvalue or rvalue subreg
2030
   operations.  */
2031
 
2032
rtx
2033
extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2034
{
2035
  enum machine_mode int_mode, src_int_mode;
2036
 
2037
  if (mode == src_mode)
2038
    return src;
2039
 
2040
  if (CONSTANT_P (src))
2041
    {
2042
      /* simplify_gen_subreg can't be used here, as if simplify_subreg
2043
         fails, it will happily create (subreg (symbol_ref)) or similar
2044
         invalid SUBREGs.  */
2045
      unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2046
      rtx ret = simplify_subreg (mode, src, src_mode, byte);
2047
      if (ret)
2048
        return ret;
2049
 
2050
      if (GET_MODE (src) == VOIDmode
2051
          || !validate_subreg (mode, src_mode, src, byte))
2052
        return NULL_RTX;
2053
 
2054
      src = force_reg (GET_MODE (src), src);
2055
      return gen_rtx_SUBREG (mode, src, byte);
2056
    }
2057
 
2058
  if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2059
    return NULL_RTX;
2060
 
2061
  if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2062
      && MODES_TIEABLE_P (mode, src_mode))
2063
    {
2064
      rtx x = gen_lowpart_common (mode, src);
2065
      if (x)
2066
        return x;
2067
    }
2068
 
2069
  src_int_mode = int_mode_for_mode (src_mode);
2070
  int_mode = int_mode_for_mode (mode);
2071
  if (src_int_mode == BLKmode || int_mode == BLKmode)
2072
    return NULL_RTX;
2073
 
2074
  if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2075
    return NULL_RTX;
2076
  if (!MODES_TIEABLE_P (int_mode, mode))
2077
    return NULL_RTX;
2078
 
2079
  src = gen_lowpart (src_int_mode, src);
2080
  src = convert_modes (int_mode, src_int_mode, src, true);
2081
  src = gen_lowpart (mode, src);
2082
  return src;
2083
}
2084
 
2085
/* Add INC into TARGET.  */
2086
 
2087
void
2088
expand_inc (rtx target, rtx inc)
2089
{
2090
  rtx value = expand_binop (GET_MODE (target), add_optab,
2091
                            target, inc,
2092
                            target, 0, OPTAB_LIB_WIDEN);
2093
  if (value != target)
2094
    emit_move_insn (target, value);
2095
}
2096
 
2097
/* Subtract DEC from TARGET.  */
2098
 
2099
void
2100
expand_dec (rtx target, rtx dec)
2101
{
2102
  rtx value = expand_binop (GET_MODE (target), sub_optab,
2103
                            target, dec,
2104
                            target, 0, OPTAB_LIB_WIDEN);
2105
  if (value != target)
2106
    emit_move_insn (target, value);
2107
}
2108
 
2109
/* Output a shift instruction for expression code CODE,
2110
   with SHIFTED being the rtx for the value to shift,
2111
   and AMOUNT the tree for the amount to shift by.
2112
   Store the result in the rtx TARGET, if that is convenient.
2113
   If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2114
   Return the rtx for where the value is.  */
2115
 
2116
rtx
2117
expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2118
              tree amount, rtx target, int unsignedp)
2119
{
2120
  rtx op1, temp = 0;
2121
  int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2122
  int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2123
  optab lshift_optab = ashl_optab;
2124
  optab rshift_arith_optab = ashr_optab;
2125
  optab rshift_uns_optab = lshr_optab;
2126
  optab lrotate_optab = rotl_optab;
2127
  optab rrotate_optab = rotr_optab;
2128
  enum machine_mode op1_mode;
2129
  int attempt;
2130
  bool speed = optimize_insn_for_speed_p ();
2131
 
2132
  op1 = expand_normal (amount);
2133
  op1_mode = GET_MODE (op1);
2134
 
2135
  /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2136
     shift amount is a vector, use the vector/vector shift patterns.  */
2137
  if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2138
    {
2139
      lshift_optab = vashl_optab;
2140
      rshift_arith_optab = vashr_optab;
2141
      rshift_uns_optab = vlshr_optab;
2142
      lrotate_optab = vrotl_optab;
2143
      rrotate_optab = vrotr_optab;
2144
    }
2145
 
2146
  /* Previously detected shift-counts computed by NEGATE_EXPR
2147
     and shifted in the other direction; but that does not work
2148
     on all machines.  */
2149
 
2150
  if (SHIFT_COUNT_TRUNCATED)
2151
    {
2152
      if (CONST_INT_P (op1)
2153
          && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2154
              (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2155
        op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2156
                       % GET_MODE_BITSIZE (mode));
2157
      else if (GET_CODE (op1) == SUBREG
2158
               && subreg_lowpart_p (op1)
2159
               && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2160
        op1 = SUBREG_REG (op1);
2161
    }
2162
 
2163
  if (op1 == const0_rtx)
2164
    return shifted;
2165
 
2166
  /* Check whether its cheaper to implement a left shift by a constant
2167
     bit count by a sequence of additions.  */
2168
  if (code == LSHIFT_EXPR
2169
      && CONST_INT_P (op1)
2170
      && INTVAL (op1) > 0
2171
      && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2172
      && INTVAL (op1) < MAX_BITS_PER_WORD
2173
      && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2174
      && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2175
    {
2176
      int i;
2177
      for (i = 0; i < INTVAL (op1); i++)
2178
        {
2179
          temp = force_reg (mode, shifted);
2180
          shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2181
                                  unsignedp, OPTAB_LIB_WIDEN);
2182
        }
2183
      return shifted;
2184
    }
2185
 
2186
  for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2187
    {
2188
      enum optab_methods methods;
2189
 
2190
      if (attempt == 0)
2191
        methods = OPTAB_DIRECT;
2192
      else if (attempt == 1)
2193
        methods = OPTAB_WIDEN;
2194
      else
2195
        methods = OPTAB_LIB_WIDEN;
2196
 
2197
      if (rotate)
2198
        {
2199
          /* Widening does not work for rotation.  */
2200
          if (methods == OPTAB_WIDEN)
2201
            continue;
2202
          else if (methods == OPTAB_LIB_WIDEN)
2203
            {
2204
              /* If we have been unable to open-code this by a rotation,
2205
                 do it as the IOR of two shifts.  I.e., to rotate A
2206
                 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2207
                 where C is the bitsize of A.
2208
 
2209
                 It is theoretically possible that the target machine might
2210
                 not be able to perform either shift and hence we would
2211
                 be making two libcalls rather than just the one for the
2212
                 shift (similarly if IOR could not be done).  We will allow
2213
                 this extremely unlikely lossage to avoid complicating the
2214
                 code below.  */
2215
 
2216
              rtx subtarget = target == shifted ? 0 : target;
2217
              tree new_amount, other_amount;
2218
              rtx temp1;
2219
              tree type = TREE_TYPE (amount);
2220
              if (GET_MODE (op1) != TYPE_MODE (type)
2221
                  && GET_MODE (op1) != VOIDmode)
2222
                op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2223
              new_amount = make_tree (type, op1);
2224
              other_amount
2225
                = fold_build2 (MINUS_EXPR, type,
2226
                               build_int_cst (type, GET_MODE_BITSIZE (mode)),
2227
                               new_amount);
2228
 
2229
              shifted = force_reg (mode, shifted);
2230
 
2231
              temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2232
                                   mode, shifted, new_amount, 0, 1);
2233
              temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2234
                                    mode, shifted, other_amount, subtarget, 1);
2235
              return expand_binop (mode, ior_optab, temp, temp1, target,
2236
                                   unsignedp, methods);
2237
            }
2238
 
2239
          temp = expand_binop (mode,
2240
                               left ? lrotate_optab : rrotate_optab,
2241
                               shifted, op1, target, unsignedp, methods);
2242
        }
2243
      else if (unsignedp)
2244
        temp = expand_binop (mode,
2245
                             left ? lshift_optab : rshift_uns_optab,
2246
                             shifted, op1, target, unsignedp, methods);
2247
 
2248
      /* Do arithmetic shifts.
2249
         Also, if we are going to widen the operand, we can just as well
2250
         use an arithmetic right-shift instead of a logical one.  */
2251
      if (temp == 0 && ! rotate
2252
          && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2253
        {
2254
          enum optab_methods methods1 = methods;
2255
 
2256
          /* If trying to widen a log shift to an arithmetic shift,
2257
             don't accept an arithmetic shift of the same size.  */
2258
          if (unsignedp)
2259
            methods1 = OPTAB_MUST_WIDEN;
2260
 
2261
          /* Arithmetic shift */
2262
 
2263
          temp = expand_binop (mode,
2264
                               left ? lshift_optab : rshift_arith_optab,
2265
                               shifted, op1, target, unsignedp, methods1);
2266
        }
2267
 
2268
      /* We used to try extzv here for logical right shifts, but that was
2269
         only useful for one machine, the VAX, and caused poor code
2270
         generation there for lshrdi3, so the code was deleted and a
2271
         define_expand for lshrsi3 was added to vax.md.  */
2272
    }
2273
 
2274
  gcc_assert (temp);
2275
  return temp;
2276
}
2277
 
2278
enum alg_code {
2279
  alg_unknown,
2280
  alg_zero,
2281
  alg_m, alg_shift,
2282
  alg_add_t_m2,
2283
  alg_sub_t_m2,
2284
  alg_add_factor,
2285
  alg_sub_factor,
2286
  alg_add_t2_m,
2287
  alg_sub_t2_m,
2288
  alg_impossible
2289
};
2290
 
2291
/* This structure holds the "cost" of a multiply sequence.  The
2292
   "cost" field holds the total rtx_cost of every operator in the
2293
   synthetic multiplication sequence, hence cost(a op b) is defined
2294
   as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2295
   The "latency" field holds the minimum possible latency of the
2296
   synthetic multiply, on a hypothetical infinitely parallel CPU.
2297
   This is the critical path, or the maximum height, of the expression
2298
   tree which is the sum of rtx_costs on the most expensive path from
2299
   any leaf to the root.  Hence latency(a op b) is defined as zero for
2300
   leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2301
 
2302
struct mult_cost {
2303
  short cost;     /* Total rtx_cost of the multiplication sequence.  */
2304
  short latency;  /* The latency of the multiplication sequence.  */
2305
};
2306
 
2307
/* This macro is used to compare a pointer to a mult_cost against an
2308
   single integer "rtx_cost" value.  This is equivalent to the macro
2309
   CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2310
#define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2311
                             || ((X)->cost == (Y) && (X)->latency < (Y)))
2312
 
2313
/* This macro is used to compare two pointers to mult_costs against
2314
   each other.  The macro returns true if X is cheaper than Y.
2315
   Currently, the cheaper of two mult_costs is the one with the
2316
   lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2317
#define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2318
                                 || ((X)->cost == (Y)->cost     \
2319
                                     && (X)->latency < (Y)->latency))
2320
 
2321
/* This structure records a sequence of operations.
2322
   `ops' is the number of operations recorded.
2323
   `cost' is their total cost.
2324
   The operations are stored in `op' and the corresponding
2325
   logarithms of the integer coefficients in `log'.
2326
 
2327
   These are the operations:
2328
   alg_zero             total := 0;
2329
   alg_m                total := multiplicand;
2330
   alg_shift            total := total * coeff
2331
   alg_add_t_m2         total := total + multiplicand * coeff;
2332
   alg_sub_t_m2         total := total - multiplicand * coeff;
2333
   alg_add_factor       total := total * coeff + total;
2334
   alg_sub_factor       total := total * coeff - total;
2335
   alg_add_t2_m         total := total * coeff + multiplicand;
2336
   alg_sub_t2_m         total := total * coeff - multiplicand;
2337
 
2338
   The first operand must be either alg_zero or alg_m.  */
2339
 
2340
struct algorithm
2341
{
2342
  struct mult_cost cost;
2343
  short ops;
2344
  /* The size of the OP and LOG fields are not directly related to the
2345
     word size, but the worst-case algorithms will be if we have few
2346
     consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2347
     In that case we will generate shift-by-2, add, shift-by-2, add,...,
2348
     in total wordsize operations.  */
2349
  enum alg_code op[MAX_BITS_PER_WORD];
2350
  char log[MAX_BITS_PER_WORD];
2351
};
2352
 
2353
/* The entry for our multiplication cache/hash table.  */
2354
struct alg_hash_entry {
2355
  /* The number we are multiplying by.  */
2356
  unsigned HOST_WIDE_INT t;
2357
 
2358
  /* The mode in which we are multiplying something by T.  */
2359
  enum machine_mode mode;
2360
 
2361
  /* The best multiplication algorithm for t.  */
2362
  enum alg_code alg;
2363
 
2364
  /* The cost of multiplication if ALG_CODE is not alg_impossible.
2365
     Otherwise, the cost within which multiplication by T is
2366
     impossible.  */
2367
  struct mult_cost cost;
2368
 
2369
  /* OPtimized for speed? */
2370
  bool speed;
2371
};
2372
 
2373
/* The number of cache/hash entries.  */
2374
#if HOST_BITS_PER_WIDE_INT == 64
2375
#define NUM_ALG_HASH_ENTRIES 1031
2376
#else
2377
#define NUM_ALG_HASH_ENTRIES 307
2378
#endif
2379
 
2380
/* Each entry of ALG_HASH caches alg_code for some integer.  This is
2381
   actually a hash table.  If we have a collision, that the older
2382
   entry is kicked out.  */
2383
static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2384
 
2385
/* Indicates the type of fixup needed after a constant multiplication.
2386
   BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2387
   the result should be negated, and ADD_VARIANT means that the
2388
   multiplicand should be added to the result.  */
2389
enum mult_variant {basic_variant, negate_variant, add_variant};
2390
 
2391
static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2392
                        const struct mult_cost *, enum machine_mode mode);
2393
static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2394
                                 struct algorithm *, enum mult_variant *, int);
2395
static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2396
                              const struct algorithm *, enum mult_variant);
2397
static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2398
                                                 int, rtx *, int *, int *);
2399
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2400
static rtx extract_high_half (enum machine_mode, rtx);
2401
static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2402
static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2403
                                       int, int);
2404
/* Compute and return the best algorithm for multiplying by T.
2405
   The algorithm must cost less than cost_limit
2406
   If retval.cost >= COST_LIMIT, no algorithm was found and all
2407
   other field of the returned struct are undefined.
2408
   MODE is the machine mode of the multiplication.  */
2409
 
2410
static void
2411
synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2412
            const struct mult_cost *cost_limit, enum machine_mode mode)
2413
{
2414
  int m;
2415
  struct algorithm *alg_in, *best_alg;
2416
  struct mult_cost best_cost;
2417
  struct mult_cost new_limit;
2418
  int op_cost, op_latency;
2419
  unsigned HOST_WIDE_INT orig_t = t;
2420
  unsigned HOST_WIDE_INT q;
2421
  int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2422
  int hash_index;
2423
  bool cache_hit = false;
2424
  enum alg_code cache_alg = alg_zero;
2425
  bool speed = optimize_insn_for_speed_p ();
2426
 
2427
  /* Indicate that no algorithm is yet found.  If no algorithm
2428
     is found, this value will be returned and indicate failure.  */
2429
  alg_out->cost.cost = cost_limit->cost + 1;
2430
  alg_out->cost.latency = cost_limit->latency + 1;
2431
 
2432
  if (cost_limit->cost < 0
2433
      || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2434
    return;
2435
 
2436
  /* Restrict the bits of "t" to the multiplication's mode.  */
2437
  t &= GET_MODE_MASK (mode);
2438
 
2439
  /* t == 1 can be done in zero cost.  */
2440
  if (t == 1)
2441
    {
2442
      alg_out->ops = 1;
2443
      alg_out->cost.cost = 0;
2444
      alg_out->cost.latency = 0;
2445
      alg_out->op[0] = alg_m;
2446
      return;
2447
    }
2448
 
2449
  /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2450
     fail now.  */
2451
  if (t == 0)
2452
    {
2453
      if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2454
        return;
2455
      else
2456
        {
2457
          alg_out->ops = 1;
2458
          alg_out->cost.cost = zero_cost[speed];
2459
          alg_out->cost.latency = zero_cost[speed];
2460
          alg_out->op[0] = alg_zero;
2461
          return;
2462
        }
2463
    }
2464
 
2465
  /* We'll be needing a couple extra algorithm structures now.  */
2466
 
2467
  alg_in = XALLOCA (struct algorithm);
2468
  best_alg = XALLOCA (struct algorithm);
2469
  best_cost = *cost_limit;
2470
 
2471
  /* Compute the hash index.  */
2472
  hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2473
 
2474
  /* See if we already know what to do for T.  */
2475
  if (alg_hash[hash_index].t == t
2476
      && alg_hash[hash_index].mode == mode
2477
      && alg_hash[hash_index].mode == mode
2478
      && alg_hash[hash_index].speed == speed
2479
      && alg_hash[hash_index].alg != alg_unknown)
2480
    {
2481
      cache_alg = alg_hash[hash_index].alg;
2482
 
2483
      if (cache_alg == alg_impossible)
2484
        {
2485
          /* The cache tells us that it's impossible to synthesize
2486
             multiplication by T within alg_hash[hash_index].cost.  */
2487
          if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2488
            /* COST_LIMIT is at least as restrictive as the one
2489
               recorded in the hash table, in which case we have no
2490
               hope of synthesizing a multiplication.  Just
2491
               return.  */
2492
            return;
2493
 
2494
          /* If we get here, COST_LIMIT is less restrictive than the
2495
             one recorded in the hash table, so we may be able to
2496
             synthesize a multiplication.  Proceed as if we didn't
2497
             have the cache entry.  */
2498
        }
2499
      else
2500
        {
2501
          if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2502
            /* The cached algorithm shows that this multiplication
2503
               requires more cost than COST_LIMIT.  Just return.  This
2504
               way, we don't clobber this cache entry with
2505
               alg_impossible but retain useful information.  */
2506
            return;
2507
 
2508
          cache_hit = true;
2509
 
2510
          switch (cache_alg)
2511
            {
2512
            case alg_shift:
2513
              goto do_alg_shift;
2514
 
2515
            case alg_add_t_m2:
2516
            case alg_sub_t_m2:
2517
              goto do_alg_addsub_t_m2;
2518
 
2519
            case alg_add_factor:
2520
            case alg_sub_factor:
2521
              goto do_alg_addsub_factor;
2522
 
2523
            case alg_add_t2_m:
2524
              goto do_alg_add_t2_m;
2525
 
2526
            case alg_sub_t2_m:
2527
              goto do_alg_sub_t2_m;
2528
 
2529
            default:
2530
              gcc_unreachable ();
2531
            }
2532
        }
2533
    }
2534
 
2535
  /* If we have a group of zero bits at the low-order part of T, try
2536
     multiplying by the remaining bits and then doing a shift.  */
2537
 
2538
  if ((t & 1) == 0)
2539
    {
2540
    do_alg_shift:
2541
      m = floor_log2 (t & -t);  /* m = number of low zero bits */
2542
      if (m < maxm)
2543
        {
2544
          q = t >> m;
2545
          /* The function expand_shift will choose between a shift and
2546
             a sequence of additions, so the observed cost is given as
2547
             MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2548
          op_cost = m * add_cost[speed][mode];
2549
          if (shift_cost[speed][mode][m] < op_cost)
2550
            op_cost = shift_cost[speed][mode][m];
2551
          new_limit.cost = best_cost.cost - op_cost;
2552
          new_limit.latency = best_cost.latency - op_cost;
2553
          synth_mult (alg_in, q, &new_limit, mode);
2554
 
2555
          alg_in->cost.cost += op_cost;
2556
          alg_in->cost.latency += op_cost;
2557
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2558
            {
2559
              struct algorithm *x;
2560
              best_cost = alg_in->cost;
2561
              x = alg_in, alg_in = best_alg, best_alg = x;
2562
              best_alg->log[best_alg->ops] = m;
2563
              best_alg->op[best_alg->ops] = alg_shift;
2564
            }
2565
 
2566
          /* See if treating ORIG_T as a signed number yields a better
2567
             sequence.  Try this sequence only for a negative ORIG_T
2568
             as it would be useless for a non-negative ORIG_T.  */
2569
          if ((HOST_WIDE_INT) orig_t < 0)
2570
            {
2571
              /* Shift ORIG_T as follows because a right shift of a
2572
                 negative-valued signed type is implementation
2573
                 defined.  */
2574
              q = ~(~orig_t >> m);
2575
              /* The function expand_shift will choose between a shift
2576
                 and a sequence of additions, so the observed cost is
2577
                 given as MIN (m * add_cost[speed][mode],
2578
                 shift_cost[speed][mode][m]).  */
2579
              op_cost = m * add_cost[speed][mode];
2580
              if (shift_cost[speed][mode][m] < op_cost)
2581
                op_cost = shift_cost[speed][mode][m];
2582
              new_limit.cost = best_cost.cost - op_cost;
2583
              new_limit.latency = best_cost.latency - op_cost;
2584
              synth_mult (alg_in, q, &new_limit, mode);
2585
 
2586
              alg_in->cost.cost += op_cost;
2587
              alg_in->cost.latency += op_cost;
2588
              if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2589
                {
2590
                  struct algorithm *x;
2591
                  best_cost = alg_in->cost;
2592
                  x = alg_in, alg_in = best_alg, best_alg = x;
2593
                  best_alg->log[best_alg->ops] = m;
2594
                  best_alg->op[best_alg->ops] = alg_shift;
2595
                }
2596
            }
2597
        }
2598
      if (cache_hit)
2599
        goto done;
2600
    }
2601
 
2602
  /* If we have an odd number, add or subtract one.  */
2603
  if ((t & 1) != 0)
2604
    {
2605
      unsigned HOST_WIDE_INT w;
2606
 
2607
    do_alg_addsub_t_m2:
2608
      for (w = 1; (w & t) != 0; w <<= 1)
2609
        ;
2610
      /* If T was -1, then W will be zero after the loop.  This is another
2611
         case where T ends with ...111.  Handling this with (T + 1) and
2612
         subtract 1 produces slightly better code and results in algorithm
2613
         selection much faster than treating it like the ...0111 case
2614
         below.  */
2615
      if (w == 0
2616
          || (w > 2
2617
              /* Reject the case where t is 3.
2618
                 Thus we prefer addition in that case.  */
2619
              && t != 3))
2620
        {
2621
          /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2622
 
2623
          op_cost = add_cost[speed][mode];
2624
          new_limit.cost = best_cost.cost - op_cost;
2625
          new_limit.latency = best_cost.latency - op_cost;
2626
          synth_mult (alg_in, t + 1, &new_limit, mode);
2627
 
2628
          alg_in->cost.cost += op_cost;
2629
          alg_in->cost.latency += op_cost;
2630
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2631
            {
2632
              struct algorithm *x;
2633
              best_cost = alg_in->cost;
2634
              x = alg_in, alg_in = best_alg, best_alg = x;
2635
              best_alg->log[best_alg->ops] = 0;
2636
              best_alg->op[best_alg->ops] = alg_sub_t_m2;
2637
            }
2638
        }
2639
      else
2640
        {
2641
          /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2642
 
2643
          op_cost = add_cost[speed][mode];
2644
          new_limit.cost = best_cost.cost - op_cost;
2645
          new_limit.latency = best_cost.latency - op_cost;
2646
          synth_mult (alg_in, t - 1, &new_limit, mode);
2647
 
2648
          alg_in->cost.cost += op_cost;
2649
          alg_in->cost.latency += op_cost;
2650
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2651
            {
2652
              struct algorithm *x;
2653
              best_cost = alg_in->cost;
2654
              x = alg_in, alg_in = best_alg, best_alg = x;
2655
              best_alg->log[best_alg->ops] = 0;
2656
              best_alg->op[best_alg->ops] = alg_add_t_m2;
2657
            }
2658
        }
2659
 
2660
      /* We may be able to calculate a * -7, a * -15, a * -31, etc
2661
         quickly with a - a * n for some appropriate constant n.  */
2662
      m = exact_log2 (-orig_t + 1);
2663
      if (m >= 0 && m < maxm)
2664
        {
2665
          op_cost = shiftsub1_cost[speed][mode][m];
2666
          new_limit.cost = best_cost.cost - op_cost;
2667
          new_limit.latency = best_cost.latency - op_cost;
2668
          synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2669
 
2670
          alg_in->cost.cost += op_cost;
2671
          alg_in->cost.latency += op_cost;
2672
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2673
            {
2674
              struct algorithm *x;
2675
              best_cost = alg_in->cost;
2676
              x = alg_in, alg_in = best_alg, best_alg = x;
2677
              best_alg->log[best_alg->ops] = m;
2678
              best_alg->op[best_alg->ops] = alg_sub_t_m2;
2679
            }
2680
        }
2681
 
2682
      if (cache_hit)
2683
        goto done;
2684
    }
2685
 
2686
  /* Look for factors of t of the form
2687
     t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2688
     If we find such a factor, we can multiply by t using an algorithm that
2689
     multiplies by q, shift the result by m and add/subtract it to itself.
2690
 
2691
     We search for large factors first and loop down, even if large factors
2692
     are less probable than small; if we find a large factor we will find a
2693
     good sequence quickly, and therefore be able to prune (by decreasing
2694
     COST_LIMIT) the search.  */
2695
 
2696
 do_alg_addsub_factor:
2697
  for (m = floor_log2 (t - 1); m >= 2; m--)
2698
    {
2699
      unsigned HOST_WIDE_INT d;
2700
 
2701
      d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2702
      if (t % d == 0 && t > d && m < maxm
2703
          && (!cache_hit || cache_alg == alg_add_factor))
2704
        {
2705
          /* If the target has a cheap shift-and-add instruction use
2706
             that in preference to a shift insn followed by an add insn.
2707
             Assume that the shift-and-add is "atomic" with a latency
2708
             equal to its cost, otherwise assume that on superscalar
2709
             hardware the shift may be executed concurrently with the
2710
             earlier steps in the algorithm.  */
2711
          op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2712
          if (shiftadd_cost[speed][mode][m] < op_cost)
2713
            {
2714
              op_cost = shiftadd_cost[speed][mode][m];
2715
              op_latency = op_cost;
2716
            }
2717
          else
2718
            op_latency = add_cost[speed][mode];
2719
 
2720
          new_limit.cost = best_cost.cost - op_cost;
2721
          new_limit.latency = best_cost.latency - op_latency;
2722
          synth_mult (alg_in, t / d, &new_limit, mode);
2723
 
2724
          alg_in->cost.cost += op_cost;
2725
          alg_in->cost.latency += op_latency;
2726
          if (alg_in->cost.latency < op_cost)
2727
            alg_in->cost.latency = op_cost;
2728
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2729
            {
2730
              struct algorithm *x;
2731
              best_cost = alg_in->cost;
2732
              x = alg_in, alg_in = best_alg, best_alg = x;
2733
              best_alg->log[best_alg->ops] = m;
2734
              best_alg->op[best_alg->ops] = alg_add_factor;
2735
            }
2736
          /* Other factors will have been taken care of in the recursion.  */
2737
          break;
2738
        }
2739
 
2740
      d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2741
      if (t % d == 0 && t > d && m < maxm
2742
          && (!cache_hit || cache_alg == alg_sub_factor))
2743
        {
2744
          /* If the target has a cheap shift-and-subtract insn use
2745
             that in preference to a shift insn followed by a sub insn.
2746
             Assume that the shift-and-sub is "atomic" with a latency
2747
             equal to it's cost, otherwise assume that on superscalar
2748
             hardware the shift may be executed concurrently with the
2749
             earlier steps in the algorithm.  */
2750
          op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2751
          if (shiftsub0_cost[speed][mode][m] < op_cost)
2752
            {
2753
              op_cost = shiftsub0_cost[speed][mode][m];
2754
              op_latency = op_cost;
2755
            }
2756
          else
2757
            op_latency = add_cost[speed][mode];
2758
 
2759
          new_limit.cost = best_cost.cost - op_cost;
2760
          new_limit.latency = best_cost.latency - op_latency;
2761
          synth_mult (alg_in, t / d, &new_limit, mode);
2762
 
2763
          alg_in->cost.cost += op_cost;
2764
          alg_in->cost.latency += op_latency;
2765
          if (alg_in->cost.latency < op_cost)
2766
            alg_in->cost.latency = op_cost;
2767
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2768
            {
2769
              struct algorithm *x;
2770
              best_cost = alg_in->cost;
2771
              x = alg_in, alg_in = best_alg, best_alg = x;
2772
              best_alg->log[best_alg->ops] = m;
2773
              best_alg->op[best_alg->ops] = alg_sub_factor;
2774
            }
2775
          break;
2776
        }
2777
    }
2778
  if (cache_hit)
2779
    goto done;
2780
 
2781
  /* Try shift-and-add (load effective address) instructions,
2782
     i.e. do a*3, a*5, a*9.  */
2783
  if ((t & 1) != 0)
2784
    {
2785
    do_alg_add_t2_m:
2786
      q = t - 1;
2787
      q = q & -q;
2788
      m = exact_log2 (q);
2789
      if (m >= 0 && m < maxm)
2790
        {
2791
          op_cost = shiftadd_cost[speed][mode][m];
2792
          new_limit.cost = best_cost.cost - op_cost;
2793
          new_limit.latency = best_cost.latency - op_cost;
2794
          synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2795
 
2796
          alg_in->cost.cost += op_cost;
2797
          alg_in->cost.latency += op_cost;
2798
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2799
            {
2800
              struct algorithm *x;
2801
              best_cost = alg_in->cost;
2802
              x = alg_in, alg_in = best_alg, best_alg = x;
2803
              best_alg->log[best_alg->ops] = m;
2804
              best_alg->op[best_alg->ops] = alg_add_t2_m;
2805
            }
2806
        }
2807
      if (cache_hit)
2808
        goto done;
2809
 
2810
    do_alg_sub_t2_m:
2811
      q = t + 1;
2812
      q = q & -q;
2813
      m = exact_log2 (q);
2814
      if (m >= 0 && m < maxm)
2815
        {
2816
          op_cost = shiftsub0_cost[speed][mode][m];
2817
          new_limit.cost = best_cost.cost - op_cost;
2818
          new_limit.latency = best_cost.latency - op_cost;
2819
          synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2820
 
2821
          alg_in->cost.cost += op_cost;
2822
          alg_in->cost.latency += op_cost;
2823
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2824
            {
2825
              struct algorithm *x;
2826
              best_cost = alg_in->cost;
2827
              x = alg_in, alg_in = best_alg, best_alg = x;
2828
              best_alg->log[best_alg->ops] = m;
2829
              best_alg->op[best_alg->ops] = alg_sub_t2_m;
2830
            }
2831
        }
2832
      if (cache_hit)
2833
        goto done;
2834
    }
2835
 
2836
 done:
2837
  /* If best_cost has not decreased, we have not found any algorithm.  */
2838
  if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2839
    {
2840
      /* We failed to find an algorithm.  Record alg_impossible for
2841
         this case (that is, <T, MODE, COST_LIMIT>) so that next time
2842
         we are asked to find an algorithm for T within the same or
2843
         lower COST_LIMIT, we can immediately return to the
2844
         caller.  */
2845
      alg_hash[hash_index].t = t;
2846
      alg_hash[hash_index].mode = mode;
2847
      alg_hash[hash_index].speed = speed;
2848
      alg_hash[hash_index].alg = alg_impossible;
2849
      alg_hash[hash_index].cost = *cost_limit;
2850
      return;
2851
    }
2852
 
2853
  /* Cache the result.  */
2854
  if (!cache_hit)
2855
    {
2856
      alg_hash[hash_index].t = t;
2857
      alg_hash[hash_index].mode = mode;
2858
      alg_hash[hash_index].speed = speed;
2859
      alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2860
      alg_hash[hash_index].cost.cost = best_cost.cost;
2861
      alg_hash[hash_index].cost.latency = best_cost.latency;
2862
    }
2863
 
2864
  /* If we are getting a too long sequence for `struct algorithm'
2865
     to record, make this search fail.  */
2866
  if (best_alg->ops == MAX_BITS_PER_WORD)
2867
    return;
2868
 
2869
  /* Copy the algorithm from temporary space to the space at alg_out.
2870
     We avoid using structure assignment because the majority of
2871
     best_alg is normally undefined, and this is a critical function.  */
2872
  alg_out->ops = best_alg->ops + 1;
2873
  alg_out->cost = best_cost;
2874
  memcpy (alg_out->op, best_alg->op,
2875
          alg_out->ops * sizeof *alg_out->op);
2876
  memcpy (alg_out->log, best_alg->log,
2877
          alg_out->ops * sizeof *alg_out->log);
2878
}
2879
 
2880
/* Find the cheapest way of multiplying a value of mode MODE by VAL.
2881
   Try three variations:
2882
 
2883
       - a shift/add sequence based on VAL itself
2884
       - a shift/add sequence based on -VAL, followed by a negation
2885
       - a shift/add sequence based on VAL - 1, followed by an addition.
2886
 
2887
   Return true if the cheapest of these cost less than MULT_COST,
2888
   describing the algorithm in *ALG and final fixup in *VARIANT.  */
2889
 
2890
static bool
2891
choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2892
                     struct algorithm *alg, enum mult_variant *variant,
2893
                     int mult_cost)
2894
{
2895
  struct algorithm alg2;
2896
  struct mult_cost limit;
2897
  int op_cost;
2898
  bool speed = optimize_insn_for_speed_p ();
2899
 
2900
  /* Fail quickly for impossible bounds.  */
2901
  if (mult_cost < 0)
2902
    return false;
2903
 
2904
  /* Ensure that mult_cost provides a reasonable upper bound.
2905
     Any constant multiplication can be performed with less
2906
     than 2 * bits additions.  */
2907
  op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2908
  if (mult_cost > op_cost)
2909
    mult_cost = op_cost;
2910
 
2911
  *variant = basic_variant;
2912
  limit.cost = mult_cost;
2913
  limit.latency = mult_cost;
2914
  synth_mult (alg, val, &limit, mode);
2915
 
2916
  /* This works only if the inverted value actually fits in an
2917
     `unsigned int' */
2918
  if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2919
    {
2920
      op_cost = neg_cost[speed][mode];
2921
      if (MULT_COST_LESS (&alg->cost, mult_cost))
2922
        {
2923
          limit.cost = alg->cost.cost - op_cost;
2924
          limit.latency = alg->cost.latency - op_cost;
2925
        }
2926
      else
2927
        {
2928
          limit.cost = mult_cost - op_cost;
2929
          limit.latency = mult_cost - op_cost;
2930
        }
2931
 
2932
      synth_mult (&alg2, -val, &limit, mode);
2933
      alg2.cost.cost += op_cost;
2934
      alg2.cost.latency += op_cost;
2935
      if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2936
        *alg = alg2, *variant = negate_variant;
2937
    }
2938
 
2939
  /* This proves very useful for division-by-constant.  */
2940
  op_cost = add_cost[speed][mode];
2941
  if (MULT_COST_LESS (&alg->cost, mult_cost))
2942
    {
2943
      limit.cost = alg->cost.cost - op_cost;
2944
      limit.latency = alg->cost.latency - op_cost;
2945
    }
2946
  else
2947
    {
2948
      limit.cost = mult_cost - op_cost;
2949
      limit.latency = mult_cost - op_cost;
2950
    }
2951
 
2952
  synth_mult (&alg2, val - 1, &limit, mode);
2953
  alg2.cost.cost += op_cost;
2954
  alg2.cost.latency += op_cost;
2955
  if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2956
    *alg = alg2, *variant = add_variant;
2957
 
2958
  return MULT_COST_LESS (&alg->cost, mult_cost);
2959
}
2960
 
2961
/* A subroutine of expand_mult, used for constant multiplications.
2962
   Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2963
   convenient.  Use the shift/add sequence described by ALG and apply
2964
   the final fixup specified by VARIANT.  */
2965
 
2966
static rtx
2967
expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2968
                   rtx target, const struct algorithm *alg,
2969
                   enum mult_variant variant)
2970
{
2971
  HOST_WIDE_INT val_so_far;
2972
  rtx insn, accum, tem;
2973
  int opno;
2974
  enum machine_mode nmode;
2975
 
2976
  /* Avoid referencing memory over and over and invalid sharing
2977
     on SUBREGs.  */
2978
  op0 = force_reg (mode, op0);
2979
 
2980
  /* ACCUM starts out either as OP0 or as a zero, depending on
2981
     the first operation.  */
2982
 
2983
  if (alg->op[0] == alg_zero)
2984
    {
2985
      accum = copy_to_mode_reg (mode, const0_rtx);
2986
      val_so_far = 0;
2987
    }
2988
  else if (alg->op[0] == alg_m)
2989
    {
2990
      accum = copy_to_mode_reg (mode, op0);
2991
      val_so_far = 1;
2992
    }
2993
  else
2994
    gcc_unreachable ();
2995
 
2996
  for (opno = 1; opno < alg->ops; opno++)
2997
    {
2998
      int log = alg->log[opno];
2999
      rtx shift_subtarget = optimize ? 0 : accum;
3000
      rtx add_target
3001
        = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3002
           && !optimize)
3003
          ? target : 0;
3004
      rtx accum_target = optimize ? 0 : accum;
3005
 
3006
      switch (alg->op[opno])
3007
        {
3008
        case alg_shift:
3009
          accum = expand_shift (LSHIFT_EXPR, mode, accum,
3010
                                build_int_cst (NULL_TREE, log),
3011
                                NULL_RTX, 0);
3012
          val_so_far <<= log;
3013
          break;
3014
 
3015
        case alg_add_t_m2:
3016
          tem = expand_shift (LSHIFT_EXPR, mode, op0,
3017
                              build_int_cst (NULL_TREE, log),
3018
                              NULL_RTX, 0);
3019
          accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3020
                                 add_target ? add_target : accum_target);
3021
          val_so_far += (HOST_WIDE_INT) 1 << log;
3022
          break;
3023
 
3024
        case alg_sub_t_m2:
3025
          tem = expand_shift (LSHIFT_EXPR, mode, op0,
3026
                              build_int_cst (NULL_TREE, log),
3027
                              NULL_RTX, 0);
3028
          accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3029
                                 add_target ? add_target : accum_target);
3030
          val_so_far -= (HOST_WIDE_INT) 1 << log;
3031
          break;
3032
 
3033
        case alg_add_t2_m:
3034
          accum = expand_shift (LSHIFT_EXPR, mode, accum,
3035
                                build_int_cst (NULL_TREE, log),
3036
                                shift_subtarget,
3037
                                0);
3038
          accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3039
                                 add_target ? add_target : accum_target);
3040
          val_so_far = (val_so_far << log) + 1;
3041
          break;
3042
 
3043
        case alg_sub_t2_m:
3044
          accum = expand_shift (LSHIFT_EXPR, mode, accum,
3045
                                build_int_cst (NULL_TREE, log),
3046
                                shift_subtarget, 0);
3047
          accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3048
                                 add_target ? add_target : accum_target);
3049
          val_so_far = (val_so_far << log) - 1;
3050
          break;
3051
 
3052
        case alg_add_factor:
3053
          tem = expand_shift (LSHIFT_EXPR, mode, accum,
3054
                              build_int_cst (NULL_TREE, log),
3055
                              NULL_RTX, 0);
3056
          accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3057
                                 add_target ? add_target : accum_target);
3058
          val_so_far += val_so_far << log;
3059
          break;
3060
 
3061
        case alg_sub_factor:
3062
          tem = expand_shift (LSHIFT_EXPR, mode, accum,
3063
                              build_int_cst (NULL_TREE, log),
3064
                              NULL_RTX, 0);
3065
          accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3066
                                 (add_target
3067
                                  ? add_target : (optimize ? 0 : tem)));
3068
          val_so_far = (val_so_far << log) - val_so_far;
3069
          break;
3070
 
3071
        default:
3072
          gcc_unreachable ();
3073
        }
3074
 
3075
      /* Write a REG_EQUAL note on the last insn so that we can cse
3076
         multiplication sequences.  Note that if ACCUM is a SUBREG,
3077
         we've set the inner register and must properly indicate
3078
         that.  */
3079
 
3080
      tem = op0, nmode = mode;
3081
      if (GET_CODE (accum) == SUBREG)
3082
        {
3083
          nmode = GET_MODE (SUBREG_REG (accum));
3084
          tem = gen_lowpart (nmode, op0);
3085
        }
3086
 
3087
      insn = get_last_insn ();
3088
      set_unique_reg_note (insn, REG_EQUAL,
3089
                           gen_rtx_MULT (nmode, tem,
3090
                                         GEN_INT (val_so_far)));
3091
    }
3092
 
3093
  if (variant == negate_variant)
3094
    {
3095
      val_so_far = -val_so_far;
3096
      accum = expand_unop (mode, neg_optab, accum, target, 0);
3097
    }
3098
  else if (variant == add_variant)
3099
    {
3100
      val_so_far = val_so_far + 1;
3101
      accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3102
    }
3103
 
3104
  /* Compare only the bits of val and val_so_far that are significant
3105
     in the result mode, to avoid sign-/zero-extension confusion.  */
3106
  val &= GET_MODE_MASK (mode);
3107
  val_so_far &= GET_MODE_MASK (mode);
3108
  gcc_assert (val == val_so_far);
3109
 
3110
  return accum;
3111
}
3112
 
3113
/* Perform a multiplication and return an rtx for the result.
3114
   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3115
   TARGET is a suggestion for where to store the result (an rtx).
3116
 
3117
   We check specially for a constant integer as OP1.
3118
   If you want this check for OP0 as well, then before calling
3119
   you should swap the two operands if OP0 would be constant.  */
3120
 
3121
rtx
3122
expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3123
             int unsignedp)
3124
{
3125
  enum mult_variant variant;
3126
  struct algorithm algorithm;
3127
  int max_cost;
3128
  bool speed = optimize_insn_for_speed_p ();
3129
 
3130
  /* Handling const0_rtx here allows us to use zero as a rogue value for
3131
     coeff below.  */
3132
  if (op1 == const0_rtx)
3133
    return const0_rtx;
3134
  if (op1 == const1_rtx)
3135
    return op0;
3136
  if (op1 == constm1_rtx)
3137
    return expand_unop (mode,
3138
                        GET_MODE_CLASS (mode) == MODE_INT
3139
                        && !unsignedp && flag_trapv
3140
                        ? negv_optab : neg_optab,
3141
                        op0, target, 0);
3142
 
3143
  /* These are the operations that are potentially turned into a sequence
3144
     of shifts and additions.  */
3145
  if (SCALAR_INT_MODE_P (mode)
3146
      && (unsignedp || !flag_trapv))
3147
    {
3148
      HOST_WIDE_INT coeff = 0;
3149
      rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3150
 
3151
      /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3152
         less than or equal in size to `unsigned int' this doesn't matter.
3153
         If the mode is larger than `unsigned int', then synth_mult works
3154
         only if the constant value exactly fits in an `unsigned int' without
3155
         any truncation.  This means that multiplying by negative values does
3156
         not work; results are off by 2^32 on a 32 bit machine.  */
3157
 
3158
      if (CONST_INT_P (op1))
3159
        {
3160
          /* Attempt to handle multiplication of DImode values by negative
3161
             coefficients, by performing the multiplication by a positive
3162
             multiplier and then inverting the result.  */
3163
          if (INTVAL (op1) < 0
3164
              && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3165
            {
3166
              /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3167
                 result is interpreted as an unsigned coefficient.
3168
                 Exclude cost of op0 from max_cost to match the cost
3169
                 calculation of the synth_mult.  */
3170
              max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3171
                         - neg_cost[speed][mode];
3172
              if (max_cost > 0
3173
                  && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3174
                                          &variant, max_cost))
3175
                {
3176
                  rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3177
                                                NULL_RTX, &algorithm,
3178
                                                variant);
3179
                  return expand_unop (mode, neg_optab, temp, target, 0);
3180
                }
3181
            }
3182
          else coeff = INTVAL (op1);
3183
        }
3184
      else if (GET_CODE (op1) == CONST_DOUBLE)
3185
        {
3186
          /* If we are multiplying in DImode, it may still be a win
3187
             to try to work with shifts and adds.  */
3188
          if (CONST_DOUBLE_HIGH (op1) == 0
3189
              && CONST_DOUBLE_LOW (op1) > 0)
3190
            coeff = CONST_DOUBLE_LOW (op1);
3191
          else if (CONST_DOUBLE_LOW (op1) == 0
3192
                   && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3193
            {
3194
              int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3195
                          + HOST_BITS_PER_WIDE_INT;
3196
              return expand_shift (LSHIFT_EXPR, mode, op0,
3197
                                   build_int_cst (NULL_TREE, shift),
3198
                                   target, unsignedp);
3199
            }
3200
        }
3201
 
3202
      /* We used to test optimize here, on the grounds that it's better to
3203
         produce a smaller program when -O is not used.  But this causes
3204
         such a terrible slowdown sometimes that it seems better to always
3205
         use synth_mult.  */
3206
      if (coeff != 0)
3207
        {
3208
          /* Special case powers of two.  */
3209
          if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3210
            return expand_shift (LSHIFT_EXPR, mode, op0,
3211
                                 build_int_cst (NULL_TREE, floor_log2 (coeff)),
3212
                                 target, unsignedp);
3213
 
3214
          /* Exclude cost of op0 from max_cost to match the cost
3215
             calculation of the synth_mult.  */
3216
          max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3217
          if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3218
                                   max_cost))
3219
            return expand_mult_const (mode, op0, coeff, target,
3220
                                      &algorithm, variant);
3221
        }
3222
    }
3223
 
3224
  if (GET_CODE (op0) == CONST_DOUBLE)
3225
    {
3226
      rtx temp = op0;
3227
      op0 = op1;
3228
      op1 = temp;
3229
    }
3230
 
3231
  /* Expand x*2.0 as x+x.  */
3232
  if (GET_CODE (op1) == CONST_DOUBLE
3233
      && SCALAR_FLOAT_MODE_P (mode))
3234
    {
3235
      REAL_VALUE_TYPE d;
3236
      REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3237
 
3238
      if (REAL_VALUES_EQUAL (d, dconst2))
3239
        {
3240
          op0 = force_reg (GET_MODE (op0), op0);
3241
          return expand_binop (mode, add_optab, op0, op0,
3242
                               target, unsignedp, OPTAB_LIB_WIDEN);
3243
        }
3244
    }
3245
 
3246
  /* This used to use umul_optab if unsigned, but for non-widening multiply
3247
     there is no difference between signed and unsigned.  */
3248
  op0 = expand_binop (mode,
3249
                      ! unsignedp
3250
                      && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3251
                      ? smulv_optab : smul_optab,
3252
                      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3253
  gcc_assert (op0);
3254
  return op0;
3255
}
3256
 
3257
/* Return the smallest n such that 2**n >= X.  */
3258
 
3259
int
3260
ceil_log2 (unsigned HOST_WIDE_INT x)
3261
{
3262
  return floor_log2 (x - 1) + 1;
3263
}
3264
 
3265
/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3266
   replace division by D, and put the least significant N bits of the result
3267
   in *MULTIPLIER_PTR and return the most significant bit.
3268
 
3269
   The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3270
   needed precision is in PRECISION (should be <= N).
3271
 
3272
   PRECISION should be as small as possible so this function can choose
3273
   multiplier more freely.
3274
 
3275
   The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3276
   is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3277
 
3278
   Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3279
   where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3280
 
3281
static
3282
unsigned HOST_WIDE_INT
3283
choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3284
                   rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3285
{
3286
  HOST_WIDE_INT mhigh_hi, mlow_hi;
3287
  unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3288
  int lgup, post_shift;
3289
  int pow, pow2;
3290
  unsigned HOST_WIDE_INT nl, dummy1;
3291
  HOST_WIDE_INT nh, dummy2;
3292
 
3293
  /* lgup = ceil(log2(divisor)); */
3294
  lgup = ceil_log2 (d);
3295
 
3296
  gcc_assert (lgup <= n);
3297
 
3298
  pow = n + lgup;
3299
  pow2 = n + lgup - precision;
3300
 
3301
  /* We could handle this with some effort, but this case is much
3302
     better handled directly with a scc insn, so rely on caller using
3303
     that.  */
3304
  gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3305
 
3306
  /* mlow = 2^(N + lgup)/d */
3307
 if (pow >= HOST_BITS_PER_WIDE_INT)
3308
    {
3309
      nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3310
      nl = 0;
3311
    }
3312
  else
3313
    {
3314
      nh = 0;
3315
      nl = (unsigned HOST_WIDE_INT) 1 << pow;
3316
    }
3317
  div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3318
                        &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3319
 
3320
  /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3321
  if (pow2 >= HOST_BITS_PER_WIDE_INT)
3322
    nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3323
  else
3324
    nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3325
  div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3326
                        &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3327
 
3328
  gcc_assert (!mhigh_hi || nh - d < d);
3329
  gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3330
  /* Assert that mlow < mhigh.  */
3331
  gcc_assert (mlow_hi < mhigh_hi
3332
              || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3333
 
3334
  /* If precision == N, then mlow, mhigh exceed 2^N
3335
     (but they do not exceed 2^(N+1)).  */
3336
 
3337
  /* Reduce to lowest terms.  */
3338
  for (post_shift = lgup; post_shift > 0; post_shift--)
3339
    {
3340
      unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3341
      unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3342
      if (ml_lo >= mh_lo)
3343
        break;
3344
 
3345
      mlow_hi = 0;
3346
      mlow_lo = ml_lo;
3347
      mhigh_hi = 0;
3348
      mhigh_lo = mh_lo;
3349
    }
3350
 
3351
  *post_shift_ptr = post_shift;
3352
  *lgup_ptr = lgup;
3353
  if (n < HOST_BITS_PER_WIDE_INT)
3354
    {
3355
      unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3356
      *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3357
      return mhigh_lo >= mask;
3358
    }
3359
  else
3360
    {
3361
      *multiplier_ptr = GEN_INT (mhigh_lo);
3362
      return mhigh_hi;
3363
    }
3364
}
3365
 
3366
/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3367
   congruent to 1 (mod 2**N).  */
3368
 
3369
static unsigned HOST_WIDE_INT
3370
invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3371
{
3372
  /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3373
 
3374
  /* The algorithm notes that the choice y = x satisfies
3375
     x*y == 1 mod 2^3, since x is assumed odd.
3376
     Each iteration doubles the number of bits of significance in y.  */
3377
 
3378
  unsigned HOST_WIDE_INT mask;
3379
  unsigned HOST_WIDE_INT y = x;
3380
  int nbit = 3;
3381
 
3382
  mask = (n == HOST_BITS_PER_WIDE_INT
3383
          ? ~(unsigned HOST_WIDE_INT) 0
3384
          : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3385
 
3386
  while (nbit < n)
3387
    {
3388
      y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3389
      nbit *= 2;
3390
    }
3391
  return y;
3392
}
3393
 
3394
/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3395
   flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3396
   product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3397
   to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3398
   become signed.
3399
 
3400
   The result is put in TARGET if that is convenient.
3401
 
3402
   MODE is the mode of operation.  */
3403
 
3404
rtx
3405
expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3406
                             rtx op1, rtx target, int unsignedp)
3407
{
3408
  rtx tem;
3409
  enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3410
 
3411
  tem = expand_shift (RSHIFT_EXPR, mode, op0,
3412
                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3413
                      NULL_RTX, 0);
3414
  tem = expand_and (mode, tem, op1, NULL_RTX);
3415
  adj_operand
3416
    = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3417
                     adj_operand);
3418
 
3419
  tem = expand_shift (RSHIFT_EXPR, mode, op1,
3420
                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3421
                      NULL_RTX, 0);
3422
  tem = expand_and (mode, tem, op0, NULL_RTX);
3423
  target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3424
                          target);
3425
 
3426
  return target;
3427
}
3428
 
3429
/* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3430
 
3431
static rtx
3432
extract_high_half (enum machine_mode mode, rtx op)
3433
{
3434
  enum machine_mode wider_mode;
3435
 
3436
  if (mode == word_mode)
3437
    return gen_highpart (mode, op);
3438
 
3439
  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3440
 
3441
  wider_mode = GET_MODE_WIDER_MODE (mode);
3442
  op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3443
                     build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3444
  return convert_modes (mode, wider_mode, op, 0);
3445
}
3446
 
3447
/* Like expand_mult_highpart, but only consider using a multiplication
3448
   optab.  OP1 is an rtx for the constant operand.  */
3449
 
3450
static rtx
3451
expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3452
                            rtx target, int unsignedp, int max_cost)
3453
{
3454
  rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3455
  enum machine_mode wider_mode;
3456
  optab moptab;
3457
  rtx tem;
3458
  int size;
3459
  bool speed = optimize_insn_for_speed_p ();
3460
 
3461
  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3462
 
3463
  wider_mode = GET_MODE_WIDER_MODE (mode);
3464
  size = GET_MODE_BITSIZE (mode);
3465
 
3466
  /* Firstly, try using a multiplication insn that only generates the needed
3467
     high part of the product, and in the sign flavor of unsignedp.  */
3468
  if (mul_highpart_cost[speed][mode] < max_cost)
3469
    {
3470
      moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3471
      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3472
                          unsignedp, OPTAB_DIRECT);
3473
      if (tem)
3474
        return tem;
3475
    }
3476
 
3477
  /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3478
     Need to adjust the result after the multiplication.  */
3479
  if (size - 1 < BITS_PER_WORD
3480
      && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3481
          + 4 * add_cost[speed][mode] < max_cost))
3482
    {
3483
      moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3484
      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3485
                          unsignedp, OPTAB_DIRECT);
3486
      if (tem)
3487
        /* We used the wrong signedness.  Adjust the result.  */
3488
        return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3489
                                            tem, unsignedp);
3490
    }
3491
 
3492
  /* Try widening multiplication.  */
3493
  moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3494
  if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3495
      && mul_widen_cost[speed][wider_mode] < max_cost)
3496
    {
3497
      tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3498
                          unsignedp, OPTAB_WIDEN);
3499
      if (tem)
3500
        return extract_high_half (mode, tem);
3501
    }
3502
 
3503
  /* Try widening the mode and perform a non-widening multiplication.  */
3504
  if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3505
      && size - 1 < BITS_PER_WORD
3506
      && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3507
    {
3508
      rtx insns, wop0, wop1;
3509
 
3510
      /* We need to widen the operands, for example to ensure the
3511
         constant multiplier is correctly sign or zero extended.
3512
         Use a sequence to clean-up any instructions emitted by
3513
         the conversions if things don't work out.  */
3514
      start_sequence ();
3515
      wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3516
      wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3517
      tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3518
                          unsignedp, OPTAB_WIDEN);
3519
      insns = get_insns ();
3520
      end_sequence ();
3521
 
3522
      if (tem)
3523
        {
3524
          emit_insn (insns);
3525
          return extract_high_half (mode, tem);
3526
        }
3527
    }
3528
 
3529
  /* Try widening multiplication of opposite signedness, and adjust.  */
3530
  moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3531
  if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3532
      && size - 1 < BITS_PER_WORD
3533
      && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3534
          + 4 * add_cost[speed][mode] < max_cost))
3535
    {
3536
      tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3537
                          NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3538
      if (tem != 0)
3539
        {
3540
          tem = extract_high_half (mode, tem);
3541
          /* We used the wrong signedness.  Adjust the result.  */
3542
          return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3543
                                              target, unsignedp);
3544
        }
3545
    }
3546
 
3547
  return 0;
3548
}
3549
 
3550
/* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3551
   putting the high half of the result in TARGET if that is convenient,
3552
   and return where the result is.  If the operation can not be performed,
3553
 
3554
 
3555
   MODE is the mode of operation and result.
3556
 
3557
   UNSIGNEDP nonzero means unsigned multiply.
3558
 
3559
   MAX_COST is the total allowed cost for the expanded RTL.  */
3560
 
3561
static rtx
3562
expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3563
                      rtx target, int unsignedp, int max_cost)
3564
{
3565
  enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3566
  unsigned HOST_WIDE_INT cnst1;
3567
  int extra_cost;
3568
  bool sign_adjust = false;
3569
  enum mult_variant variant;
3570
  struct algorithm alg;
3571
  rtx tem;
3572
  bool speed = optimize_insn_for_speed_p ();
3573
 
3574
  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3575
  /* We can't support modes wider than HOST_BITS_PER_INT.  */
3576
  gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3577
 
3578
  cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3579
 
3580
  /* We can't optimize modes wider than BITS_PER_WORD.
3581
     ??? We might be able to perform double-word arithmetic if
3582
     mode == word_mode, however all the cost calculations in
3583
     synth_mult etc. assume single-word operations.  */
3584
  if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3585
    return expand_mult_highpart_optab (mode, op0, op1, target,
3586
                                       unsignedp, max_cost);
3587
 
3588
  extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3589
 
3590
  /* Check whether we try to multiply by a negative constant.  */
3591
  if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3592
    {
3593
      sign_adjust = true;
3594
      extra_cost += add_cost[speed][mode];
3595
    }
3596
 
3597
  /* See whether shift/add multiplication is cheap enough.  */
3598
  if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3599
                           max_cost - extra_cost))
3600
    {
3601
      /* See whether the specialized multiplication optabs are
3602
         cheaper than the shift/add version.  */
3603
      tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3604
                                        alg.cost.cost + extra_cost);
3605
      if (tem)
3606
        return tem;
3607
 
3608
      tem = convert_to_mode (wider_mode, op0, unsignedp);
3609
      tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3610
      tem = extract_high_half (mode, tem);
3611
 
3612
      /* Adjust result for signedness.  */
3613
      if (sign_adjust)
3614
        tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3615
 
3616
      return tem;
3617
    }
3618
  return expand_mult_highpart_optab (mode, op0, op1, target,
3619
                                     unsignedp, max_cost);
3620
}
3621
 
3622
 
3623
/* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3624
 
3625
static rtx
3626
expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3627
{
3628
  unsigned HOST_WIDE_INT masklow, maskhigh;
3629
  rtx result, temp, shift, label;
3630
  int logd;
3631
 
3632
  logd = floor_log2 (d);
3633
  result = gen_reg_rtx (mode);
3634
 
3635
  /* Avoid conditional branches when they're expensive.  */
3636
  if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3637
      && optimize_insn_for_speed_p ())
3638
    {
3639
      rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3640
                                      mode, 0, -1);
3641
      if (signmask)
3642
        {
3643
          signmask = force_reg (mode, signmask);
3644
          masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3645
          shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3646
 
3647
          /* Use the rtx_cost of a LSHIFTRT instruction to determine
3648
             which instruction sequence to use.  If logical right shifts
3649
             are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3650
             use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3651
 
3652
          temp = gen_rtx_LSHIFTRT (mode, result, shift);
3653
          if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3654
              || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3655
            {
3656
              temp = expand_binop (mode, xor_optab, op0, signmask,
3657
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3658
              temp = expand_binop (mode, sub_optab, temp, signmask,
3659
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3660
              temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3661
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3662
              temp = expand_binop (mode, xor_optab, temp, signmask,
3663
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3664
              temp = expand_binop (mode, sub_optab, temp, signmask,
3665
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3666
            }
3667
          else
3668
            {
3669
              signmask = expand_binop (mode, lshr_optab, signmask, shift,
3670
                                       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3671
              signmask = force_reg (mode, signmask);
3672
 
3673
              temp = expand_binop (mode, add_optab, op0, signmask,
3674
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3675
              temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3676
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3677
              temp = expand_binop (mode, sub_optab, temp, signmask,
3678
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3679
            }
3680
          return temp;
3681
        }
3682
    }
3683
 
3684
  /* Mask contains the mode's signbit and the significant bits of the
3685
     modulus.  By including the signbit in the operation, many targets
3686
     can avoid an explicit compare operation in the following comparison
3687
     against zero.  */
3688
 
3689
  masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3690
  if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3691
    {
3692
      masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3693
      maskhigh = -1;
3694
    }
3695
  else
3696
    maskhigh = (HOST_WIDE_INT) -1
3697
                 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3698
 
3699
  temp = expand_binop (mode, and_optab, op0,
3700
                       immed_double_const (masklow, maskhigh, mode),
3701
                       result, 1, OPTAB_LIB_WIDEN);
3702
  if (temp != result)
3703
    emit_move_insn (result, temp);
3704
 
3705
  label = gen_label_rtx ();
3706
  do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3707
 
3708
  temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3709
                       0, OPTAB_LIB_WIDEN);
3710
  masklow = (HOST_WIDE_INT) -1 << logd;
3711
  maskhigh = -1;
3712
  temp = expand_binop (mode, ior_optab, temp,
3713
                       immed_double_const (masklow, maskhigh, mode),
3714
                       result, 1, OPTAB_LIB_WIDEN);
3715
  temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3716
                       0, OPTAB_LIB_WIDEN);
3717
  if (temp != result)
3718
    emit_move_insn (result, temp);
3719
  emit_label (label);
3720
  return result;
3721
}
3722
 
3723
/* Expand signed division of OP0 by a power of two D in mode MODE.
3724
   This routine is only called for positive values of D.  */
3725
 
3726
static rtx
3727
expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3728
{
3729
  rtx temp, label;
3730
  tree shift;
3731
  int logd;
3732
 
3733
  logd = floor_log2 (d);
3734
  shift = build_int_cst (NULL_TREE, logd);
3735
 
3736
  if (d == 2
3737
      && BRANCH_COST (optimize_insn_for_speed_p (),
3738
                      false) >= 1)
3739
    {
3740
      temp = gen_reg_rtx (mode);
3741
      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3742
      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3743
                           0, OPTAB_LIB_WIDEN);
3744
      return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3745
    }
3746
 
3747
#ifdef HAVE_conditional_move
3748
  if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3749
      >= 2)
3750
    {
3751
      rtx temp2;
3752
 
3753
      /* ??? emit_conditional_move forces a stack adjustment via
3754
         compare_from_rtx so, if the sequence is discarded, it will
3755
         be lost.  Do it now instead.  */
3756
      do_pending_stack_adjust ();
3757
 
3758
      start_sequence ();
3759
      temp2 = copy_to_mode_reg (mode, op0);
3760
      temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3761
                           NULL_RTX, 0, OPTAB_LIB_WIDEN);
3762
      temp = force_reg (mode, temp);
3763
 
3764
      /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3765
      temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3766
                                     mode, temp, temp2, mode, 0);
3767
      if (temp2)
3768
        {
3769
          rtx seq = get_insns ();
3770
          end_sequence ();
3771
          emit_insn (seq);
3772
          return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3773
        }
3774
      end_sequence ();
3775
    }
3776
#endif
3777
 
3778
  if (BRANCH_COST (optimize_insn_for_speed_p (),
3779
                   false) >= 2)
3780
    {
3781
      int ushift = GET_MODE_BITSIZE (mode) - logd;
3782
 
3783
      temp = gen_reg_rtx (mode);
3784
      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3785
      if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3786
        temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3787
                             NULL_RTX, 0, OPTAB_LIB_WIDEN);
3788
      else
3789
        temp = expand_shift (RSHIFT_EXPR, mode, temp,
3790
                             build_int_cst (NULL_TREE, ushift),
3791
                             NULL_RTX, 1);
3792
      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3793
                           0, OPTAB_LIB_WIDEN);
3794
      return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3795
    }
3796
 
3797
  label = gen_label_rtx ();
3798
  temp = copy_to_mode_reg (mode, op0);
3799
  do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3800
  expand_inc (temp, GEN_INT (d - 1));
3801
  emit_label (label);
3802
  return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3803
}
3804
 
3805
/* Emit the code to divide OP0 by OP1, putting the result in TARGET
3806
   if that is convenient, and returning where the result is.
3807
   You may request either the quotient or the remainder as the result;
3808
   specify REM_FLAG nonzero to get the remainder.
3809
 
3810
   CODE is the expression code for which kind of division this is;
3811
   it controls how rounding is done.  MODE is the machine mode to use.
3812
   UNSIGNEDP nonzero means do unsigned division.  */
3813
 
3814
/* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3815
   and then correct it by or'ing in missing high bits
3816
   if result of ANDI is nonzero.
3817
   For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3818
   This could optimize to a bfexts instruction.
3819
   But C doesn't use these operations, so their optimizations are
3820
   left for later.  */
3821
/* ??? For modulo, we don't actually need the highpart of the first product,
3822
   the low part will do nicely.  And for small divisors, the second multiply
3823
   can also be a low-part only multiply or even be completely left out.
3824
   E.g. to calculate the remainder of a division by 3 with a 32 bit
3825
   multiply, multiply with 0x55555556 and extract the upper two bits;
3826
   the result is exact for inputs up to 0x1fffffff.
3827
   The input range can be reduced by using cross-sum rules.
3828
   For odd divisors >= 3, the following table gives right shift counts
3829
   so that if a number is shifted by an integer multiple of the given
3830
   amount, the remainder stays the same:
3831
   2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3832
   14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3833
   0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3834
   20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3835
   0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3836
 
3837
   Cross-sum rules for even numbers can be derived by leaving as many bits
3838
   to the right alone as the divisor has zeros to the right.
3839
   E.g. if x is an unsigned 32 bit number:
3840
   (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3841
   */
3842
 
3843
rtx
3844
expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3845
               rtx op0, rtx op1, rtx target, int unsignedp)
3846
{
3847
  enum machine_mode compute_mode;
3848
  rtx tquotient;
3849
  rtx quotient = 0, remainder = 0;
3850
  rtx last;
3851
  int size;
3852
  rtx insn, set;
3853
  optab optab1, optab2;
3854
  int op1_is_constant, op1_is_pow2 = 0;
3855
  int max_cost, extra_cost;
3856
  static HOST_WIDE_INT last_div_const = 0;
3857
  static HOST_WIDE_INT ext_op1;
3858
  bool speed = optimize_insn_for_speed_p ();
3859
 
3860
  op1_is_constant = CONST_INT_P (op1);
3861
  if (op1_is_constant)
3862
    {
3863
      ext_op1 = INTVAL (op1);
3864
      if (unsignedp)
3865
        ext_op1 &= GET_MODE_MASK (mode);
3866
      op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3867
                     || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3868
    }
3869
 
3870
  /*
3871
     This is the structure of expand_divmod:
3872
 
3873
     First comes code to fix up the operands so we can perform the operations
3874
     correctly and efficiently.
3875
 
3876
     Second comes a switch statement with code specific for each rounding mode.
3877
     For some special operands this code emits all RTL for the desired
3878
     operation, for other cases, it generates only a quotient and stores it in
3879
     QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3880
     to indicate that it has not done anything.
3881
 
3882
     Last comes code that finishes the operation.  If QUOTIENT is set and
3883
     REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3884
     QUOTIENT is not set, it is computed using trunc rounding.
3885
 
3886
     We try to generate special code for division and remainder when OP1 is a
3887
     constant.  If |OP1| = 2**n we can use shifts and some other fast
3888
     operations.  For other values of OP1, we compute a carefully selected
3889
     fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3890
     by m.
3891
 
3892
     In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3893
     half of the product.  Different strategies for generating the product are
3894
     implemented in expand_mult_highpart.
3895
 
3896
     If what we actually want is the remainder, we generate that by another
3897
     by-constant multiplication and a subtraction.  */
3898
 
3899
  /* We shouldn't be called with OP1 == const1_rtx, but some of the
3900
     code below will malfunction if we are, so check here and handle
3901
     the special case if so.  */
3902
  if (op1 == const1_rtx)
3903
    return rem_flag ? const0_rtx : op0;
3904
 
3905
    /* When dividing by -1, we could get an overflow.
3906
     negv_optab can handle overflows.  */
3907
  if (! unsignedp && op1 == constm1_rtx)
3908
    {
3909
      if (rem_flag)
3910
        return const0_rtx;
3911
      return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3912
                          ? negv_optab : neg_optab, op0, target, 0);
3913
    }
3914
 
3915
  if (target
3916
      /* Don't use the function value register as a target
3917
         since we have to read it as well as write it,
3918
         and function-inlining gets confused by this.  */
3919
      && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3920
          /* Don't clobber an operand while doing a multi-step calculation.  */
3921
          || ((rem_flag || op1_is_constant)
3922
              && (reg_mentioned_p (target, op0)
3923
                  || (MEM_P (op0) && MEM_P (target))))
3924
          || reg_mentioned_p (target, op1)
3925
          || (MEM_P (op1) && MEM_P (target))))
3926
    target = 0;
3927
 
3928
  /* Get the mode in which to perform this computation.  Normally it will
3929
     be MODE, but sometimes we can't do the desired operation in MODE.
3930
     If so, pick a wider mode in which we can do the operation.  Convert
3931
     to that mode at the start to avoid repeated conversions.
3932
 
3933
     First see what operations we need.  These depend on the expression
3934
     we are evaluating.  (We assume that divxx3 insns exist under the
3935
     same conditions that modxx3 insns and that these insns don't normally
3936
     fail.  If these assumptions are not correct, we may generate less
3937
     efficient code in some cases.)
3938
 
3939
     Then see if we find a mode in which we can open-code that operation
3940
     (either a division, modulus, or shift).  Finally, check for the smallest
3941
     mode for which we can do the operation with a library call.  */
3942
 
3943
  /* We might want to refine this now that we have division-by-constant
3944
     optimization.  Since expand_mult_highpart tries so many variants, it is
3945
     not straightforward to generalize this.  Maybe we should make an array
3946
     of possible modes in init_expmed?  Save this for GCC 2.7.  */
3947
 
3948
  optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3949
            ? (unsignedp ? lshr_optab : ashr_optab)
3950
            : (unsignedp ? udiv_optab : sdiv_optab));
3951
  optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3952
            ? optab1
3953
            : (unsignedp ? udivmod_optab : sdivmod_optab));
3954
 
3955
  for (compute_mode = mode; compute_mode != VOIDmode;
3956
       compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3957
    if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3958
        || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3959
      break;
3960
 
3961
  if (compute_mode == VOIDmode)
3962
    for (compute_mode = mode; compute_mode != VOIDmode;
3963
         compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3964
      if (optab_libfunc (optab1, compute_mode)
3965
          || optab_libfunc (optab2, compute_mode))
3966
        break;
3967
 
3968
  /* If we still couldn't find a mode, use MODE, but expand_binop will
3969
     probably die.  */
3970
  if (compute_mode == VOIDmode)
3971
    compute_mode = mode;
3972
 
3973
  if (target && GET_MODE (target) == compute_mode)
3974
    tquotient = target;
3975
  else
3976
    tquotient = gen_reg_rtx (compute_mode);
3977
 
3978
  size = GET_MODE_BITSIZE (compute_mode);
3979
#if 0
3980
  /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3981
     (mode), and thereby get better code when OP1 is a constant.  Do that
3982
     later.  It will require going over all usages of SIZE below.  */
3983
  size = GET_MODE_BITSIZE (mode);
3984
#endif
3985
 
3986
  /* Only deduct something for a REM if the last divide done was
3987
     for a different constant.   Then set the constant of the last
3988
     divide.  */
3989
  max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3990
  if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3991
                     && INTVAL (op1) == last_div_const))
3992
    max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3993
 
3994
  last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3995
 
3996
  /* Now convert to the best mode to use.  */
3997
  if (compute_mode != mode)
3998
    {
3999
      op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4000
      op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4001
 
4002
      /* convert_modes may have placed op1 into a register, so we
4003
         must recompute the following.  */
4004
      op1_is_constant = CONST_INT_P (op1);
4005
      op1_is_pow2 = (op1_is_constant
4006
                     && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4007
                          || (! unsignedp
4008
                              && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4009
    }
4010
 
4011
  /* If one of the operands is a volatile MEM, copy it into a register.  */
4012
 
4013
  if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4014
    op0 = force_reg (compute_mode, op0);
4015
  if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4016
    op1 = force_reg (compute_mode, op1);
4017
 
4018
  /* If we need the remainder or if OP1 is constant, we need to
4019
     put OP0 in a register in case it has any queued subexpressions.  */
4020
  if (rem_flag || op1_is_constant)
4021
    op0 = force_reg (compute_mode, op0);
4022
 
4023
  last = get_last_insn ();
4024
 
4025
  /* Promote floor rounding to trunc rounding for unsigned operations.  */
4026
  if (unsignedp)
4027
    {
4028
      if (code == FLOOR_DIV_EXPR)
4029
        code = TRUNC_DIV_EXPR;
4030
      if (code == FLOOR_MOD_EXPR)
4031
        code = TRUNC_MOD_EXPR;
4032
      if (code == EXACT_DIV_EXPR && op1_is_pow2)
4033
        code = TRUNC_DIV_EXPR;
4034
    }
4035
 
4036
  if (op1 != const0_rtx)
4037
    switch (code)
4038
      {
4039
      case TRUNC_MOD_EXPR:
4040
      case TRUNC_DIV_EXPR:
4041
        if (op1_is_constant)
4042
          {
4043
            if (unsignedp)
4044
              {
4045
                unsigned HOST_WIDE_INT mh;
4046
                int pre_shift, post_shift;
4047
                int dummy;
4048
                rtx ml;
4049
                unsigned HOST_WIDE_INT d = (INTVAL (op1)
4050
                                            & GET_MODE_MASK (compute_mode));
4051
 
4052
                if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4053
                  {
4054
                    pre_shift = floor_log2 (d);
4055
                    if (rem_flag)
4056
                      {
4057
                        remainder
4058
                          = expand_binop (compute_mode, and_optab, op0,
4059
                                          GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4060
                                          remainder, 1,
4061
                                          OPTAB_LIB_WIDEN);
4062
                        if (remainder)
4063
                          return gen_lowpart (mode, remainder);
4064
                      }
4065
                    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4066
                                             build_int_cst (NULL_TREE,
4067
                                                            pre_shift),
4068
                                             tquotient, 1);
4069
                  }
4070
                else if (size <= HOST_BITS_PER_WIDE_INT)
4071
                  {
4072
                    if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4073
                      {
4074
                        /* Most significant bit of divisor is set; emit an scc
4075
                           insn.  */
4076
                        quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4077
                                                          compute_mode, 1, 1);
4078
                      }
4079
                    else
4080
                      {
4081
                        /* Find a suitable multiplier and right shift count
4082
                           instead of multiplying with D.  */
4083
 
4084
                        mh = choose_multiplier (d, size, size,
4085
                                                &ml, &post_shift, &dummy);
4086
 
4087
                        /* If the suggested multiplier is more than SIZE bits,
4088
                           we can do better for even divisors, using an
4089
                           initial right shift.  */
4090
                        if (mh != 0 && (d & 1) == 0)
4091
                          {
4092
                            pre_shift = floor_log2 (d & -d);
4093
                            mh = choose_multiplier (d >> pre_shift, size,
4094
                                                    size - pre_shift,
4095
                                                    &ml, &post_shift, &dummy);
4096
                            gcc_assert (!mh);
4097
                          }
4098
                        else
4099
                          pre_shift = 0;
4100
 
4101
                        if (mh != 0)
4102
                          {
4103
                            rtx t1, t2, t3, t4;
4104
 
4105
                            if (post_shift - 1 >= BITS_PER_WORD)
4106
                              goto fail1;
4107
 
4108
                            extra_cost
4109
                              = (shift_cost[speed][compute_mode][post_shift - 1]
4110
                                 + shift_cost[speed][compute_mode][1]
4111
                                 + 2 * add_cost[speed][compute_mode]);
4112
                            t1 = expand_mult_highpart (compute_mode, op0, ml,
4113
                                                       NULL_RTX, 1,
4114
                                                       max_cost - extra_cost);
4115
                            if (t1 == 0)
4116
                              goto fail1;
4117
                            t2 = force_operand (gen_rtx_MINUS (compute_mode,
4118
                                                               op0, t1),
4119
                                                NULL_RTX);
4120
                            t3 = expand_shift
4121
                              (RSHIFT_EXPR, compute_mode, t2,
4122
                               build_int_cst (NULL_TREE, 1),
4123
                               NULL_RTX,1);
4124
                            t4 = force_operand (gen_rtx_PLUS (compute_mode,
4125
                                                              t1, t3),
4126
                                                NULL_RTX);
4127
                            quotient = expand_shift
4128
                              (RSHIFT_EXPR, compute_mode, t4,
4129
                               build_int_cst (NULL_TREE, post_shift - 1),
4130
                               tquotient, 1);
4131
                          }
4132
                        else
4133
                          {
4134
                            rtx t1, t2;
4135
 
4136
                            if (pre_shift >= BITS_PER_WORD
4137
                                || post_shift >= BITS_PER_WORD)
4138
                              goto fail1;
4139
 
4140
                            t1 = expand_shift
4141
                              (RSHIFT_EXPR, compute_mode, op0,
4142
                               build_int_cst (NULL_TREE, pre_shift),
4143
                               NULL_RTX, 1);
4144
                            extra_cost
4145
                              = (shift_cost[speed][compute_mode][pre_shift]
4146
                                 + shift_cost[speed][compute_mode][post_shift]);
4147
                            t2 = expand_mult_highpart (compute_mode, t1, ml,
4148
                                                       NULL_RTX, 1,
4149
                                                       max_cost - extra_cost);
4150
                            if (t2 == 0)
4151
                              goto fail1;
4152
                            quotient = expand_shift
4153
                              (RSHIFT_EXPR, compute_mode, t2,
4154
                               build_int_cst (NULL_TREE, post_shift),
4155
                               tquotient, 1);
4156
                          }
4157
                      }
4158
                  }
4159
                else            /* Too wide mode to use tricky code */
4160
                  break;
4161
 
4162
                insn = get_last_insn ();
4163
                if (insn != last
4164
                    && (set = single_set (insn)) != 0
4165
                    && SET_DEST (set) == quotient)
4166
                  set_unique_reg_note (insn,
4167
                                       REG_EQUAL,
4168
                                       gen_rtx_UDIV (compute_mode, op0, op1));
4169
              }
4170
            else                /* TRUNC_DIV, signed */
4171
              {
4172
                unsigned HOST_WIDE_INT ml;
4173
                int lgup, post_shift;
4174
                rtx mlr;
4175
                HOST_WIDE_INT d = INTVAL (op1);
4176
                unsigned HOST_WIDE_INT abs_d;
4177
 
4178
                /* Since d might be INT_MIN, we have to cast to
4179
                   unsigned HOST_WIDE_INT before negating to avoid
4180
                   undefined signed overflow.  */
4181
                abs_d = (d >= 0
4182
                         ? (unsigned HOST_WIDE_INT) d
4183
                         : - (unsigned HOST_WIDE_INT) d);
4184
 
4185
                /* n rem d = n rem -d */
4186
                if (rem_flag && d < 0)
4187
                  {
4188
                    d = abs_d;
4189
                    op1 = gen_int_mode (abs_d, compute_mode);
4190
                  }
4191
 
4192
                if (d == 1)
4193
                  quotient = op0;
4194
                else if (d == -1)
4195
                  quotient = expand_unop (compute_mode, neg_optab, op0,
4196
                                          tquotient, 0);
4197
                else if (HOST_BITS_PER_WIDE_INT >= size
4198
                         && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4199
                  {
4200
                    /* This case is not handled correctly below.  */
4201
                    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4202
                                                compute_mode, 1, 1);
4203
                    if (quotient == 0)
4204
                      goto fail1;
4205
                  }
4206
                else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4207
                         && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4208
                                      : sdiv_pow2_cheap[speed][compute_mode])
4209
                         /* We assume that cheap metric is true if the
4210
                            optab has an expander for this mode.  */
4211
                         && ((optab_handler ((rem_flag ? smod_optab
4212
                                              : sdiv_optab),
4213
                                              compute_mode)->insn_code
4214
                              != CODE_FOR_nothing)
4215
                             || (optab_handler(sdivmod_optab,
4216
                                               compute_mode)
4217
                                 ->insn_code != CODE_FOR_nothing)))
4218
                  ;
4219
                else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4220
                  {
4221
                    if (rem_flag)
4222
                      {
4223
                        remainder = expand_smod_pow2 (compute_mode, op0, d);
4224
                        if (remainder)
4225
                          return gen_lowpart (mode, remainder);
4226
                      }
4227
 
4228
                    if (sdiv_pow2_cheap[speed][compute_mode]
4229
                        && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4230
                             != CODE_FOR_nothing)
4231
                            || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4232
                                != CODE_FOR_nothing)))
4233
                      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4234
                                                compute_mode, op0,
4235
                                                gen_int_mode (abs_d,
4236
                                                              compute_mode),
4237
                                                NULL_RTX, 0);
4238
                    else
4239
                      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4240
 
4241
                    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4242
                       negate the quotient.  */
4243
                    if (d < 0)
4244
                      {
4245
                        insn = get_last_insn ();
4246
                        if (insn != last
4247
                            && (set = single_set (insn)) != 0
4248
                            && SET_DEST (set) == quotient
4249
                            && abs_d < ((unsigned HOST_WIDE_INT) 1
4250
                                        << (HOST_BITS_PER_WIDE_INT - 1)))
4251
                          set_unique_reg_note (insn,
4252
                                               REG_EQUAL,
4253
                                               gen_rtx_DIV (compute_mode,
4254
                                                            op0,
4255
                                                            GEN_INT
4256
                                                            (trunc_int_for_mode
4257
                                                             (abs_d,
4258
                                                              compute_mode))));
4259
 
4260
                        quotient = expand_unop (compute_mode, neg_optab,
4261
                                                quotient, quotient, 0);
4262
                      }
4263
                  }
4264
                else if (size <= HOST_BITS_PER_WIDE_INT)
4265
                  {
4266
                    choose_multiplier (abs_d, size, size - 1,
4267
                                       &mlr, &post_shift, &lgup);
4268
                    ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4269
                    if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4270
                      {
4271
                        rtx t1, t2, t3;
4272
 
4273
                        if (post_shift >= BITS_PER_WORD
4274
                            || size - 1 >= BITS_PER_WORD)
4275
                          goto fail1;
4276
 
4277
                        extra_cost = (shift_cost[speed][compute_mode][post_shift]
4278
                                      + shift_cost[speed][compute_mode][size - 1]
4279
                                      + add_cost[speed][compute_mode]);
4280
                        t1 = expand_mult_highpart (compute_mode, op0, mlr,
4281
                                                   NULL_RTX, 0,
4282
                                                   max_cost - extra_cost);
4283
                        if (t1 == 0)
4284
                          goto fail1;
4285
                        t2 = expand_shift
4286
                          (RSHIFT_EXPR, compute_mode, t1,
4287
                           build_int_cst (NULL_TREE, post_shift),
4288
                           NULL_RTX, 0);
4289
                        t3 = expand_shift
4290
                          (RSHIFT_EXPR, compute_mode, op0,
4291
                           build_int_cst (NULL_TREE, size - 1),
4292
                           NULL_RTX, 0);
4293
                        if (d < 0)
4294
                          quotient
4295
                            = force_operand (gen_rtx_MINUS (compute_mode,
4296
                                                            t3, t2),
4297
                                             tquotient);
4298
                        else
4299
                          quotient
4300
                            = force_operand (gen_rtx_MINUS (compute_mode,
4301
                                                            t2, t3),
4302
                                             tquotient);
4303
                      }
4304
                    else
4305
                      {
4306
                        rtx t1, t2, t3, t4;
4307
 
4308
                        if (post_shift >= BITS_PER_WORD
4309
                            || size - 1 >= BITS_PER_WORD)
4310
                          goto fail1;
4311
 
4312
                        ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4313
                        mlr = gen_int_mode (ml, compute_mode);
4314
                        extra_cost = (shift_cost[speed][compute_mode][post_shift]
4315
                                      + shift_cost[speed][compute_mode][size - 1]
4316
                                      + 2 * add_cost[speed][compute_mode]);
4317
                        t1 = expand_mult_highpart (compute_mode, op0, mlr,
4318
                                                   NULL_RTX, 0,
4319
                                                   max_cost - extra_cost);
4320
                        if (t1 == 0)
4321
                          goto fail1;
4322
                        t2 = force_operand (gen_rtx_PLUS (compute_mode,
4323
                                                          t1, op0),
4324
                                            NULL_RTX);
4325
                        t3 = expand_shift
4326
                          (RSHIFT_EXPR, compute_mode, t2,
4327
                           build_int_cst (NULL_TREE, post_shift),
4328
                           NULL_RTX, 0);
4329
                        t4 = expand_shift
4330
                          (RSHIFT_EXPR, compute_mode, op0,
4331
                           build_int_cst (NULL_TREE, size - 1),
4332
                           NULL_RTX, 0);
4333
                        if (d < 0)
4334
                          quotient
4335
                            = force_operand (gen_rtx_MINUS (compute_mode,
4336
                                                            t4, t3),
4337
                                             tquotient);
4338
                        else
4339
                          quotient
4340
                            = force_operand (gen_rtx_MINUS (compute_mode,
4341
                                                            t3, t4),
4342
                                             tquotient);
4343
                      }
4344
                  }
4345
                else            /* Too wide mode to use tricky code */
4346
                  break;
4347
 
4348
                insn = get_last_insn ();
4349
                if (insn != last
4350
                    && (set = single_set (insn)) != 0
4351
                    && SET_DEST (set) == quotient)
4352
                  set_unique_reg_note (insn,
4353
                                       REG_EQUAL,
4354
                                       gen_rtx_DIV (compute_mode, op0, op1));
4355
              }
4356
            break;
4357
          }
4358
      fail1:
4359
        delete_insns_since (last);
4360
        break;
4361
 
4362
      case FLOOR_DIV_EXPR:
4363
      case FLOOR_MOD_EXPR:
4364
      /* We will come here only for signed operations.  */
4365
        if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4366
          {
4367
            unsigned HOST_WIDE_INT mh;
4368
            int pre_shift, lgup, post_shift;
4369
            HOST_WIDE_INT d = INTVAL (op1);
4370
            rtx ml;
4371
 
4372
            if (d > 0)
4373
              {
4374
                /* We could just as easily deal with negative constants here,
4375
                   but it does not seem worth the trouble for GCC 2.6.  */
4376
                if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4377
                  {
4378
                    pre_shift = floor_log2 (d);
4379
                    if (rem_flag)
4380
                      {
4381
                        remainder = expand_binop (compute_mode, and_optab, op0,
4382
                                                  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4383
                                                  remainder, 0, OPTAB_LIB_WIDEN);
4384
                        if (remainder)
4385
                          return gen_lowpart (mode, remainder);
4386
                      }
4387
                    quotient = expand_shift
4388
                      (RSHIFT_EXPR, compute_mode, op0,
4389
                       build_int_cst (NULL_TREE, pre_shift),
4390
                       tquotient, 0);
4391
                  }
4392
                else
4393
                  {
4394
                    rtx t1, t2, t3, t4;
4395
 
4396
                    mh = choose_multiplier (d, size, size - 1,
4397
                                            &ml, &post_shift, &lgup);
4398
                    gcc_assert (!mh);
4399
 
4400
                    if (post_shift < BITS_PER_WORD
4401
                        && size - 1 < BITS_PER_WORD)
4402
                      {
4403
                        t1 = expand_shift
4404
                          (RSHIFT_EXPR, compute_mode, op0,
4405
                           build_int_cst (NULL_TREE, size - 1),
4406
                           NULL_RTX, 0);
4407
                        t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4408
                                           NULL_RTX, 0, OPTAB_WIDEN);
4409
                        extra_cost = (shift_cost[speed][compute_mode][post_shift]
4410
                                      + shift_cost[speed][compute_mode][size - 1]
4411
                                      + 2 * add_cost[speed][compute_mode]);
4412
                        t3 = expand_mult_highpart (compute_mode, t2, ml,
4413
                                                   NULL_RTX, 1,
4414
                                                   max_cost - extra_cost);
4415
                        if (t3 != 0)
4416
                          {
4417
                            t4 = expand_shift
4418
                              (RSHIFT_EXPR, compute_mode, t3,
4419
                               build_int_cst (NULL_TREE, post_shift),
4420
                               NULL_RTX, 1);
4421
                            quotient = expand_binop (compute_mode, xor_optab,
4422
                                                     t4, t1, tquotient, 0,
4423
                                                     OPTAB_WIDEN);
4424
                          }
4425
                      }
4426
                  }
4427
              }
4428
            else
4429
              {
4430
                rtx nsign, t1, t2, t3, t4;
4431
                t1 = force_operand (gen_rtx_PLUS (compute_mode,
4432
                                                  op0, constm1_rtx), NULL_RTX);
4433
                t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4434
                                   0, OPTAB_WIDEN);
4435
                nsign = expand_shift
4436
                  (RSHIFT_EXPR, compute_mode, t2,
4437
                   build_int_cst (NULL_TREE, size - 1),
4438
                   NULL_RTX, 0);
4439
                t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4440
                                    NULL_RTX);
4441
                t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4442
                                    NULL_RTX, 0);
4443
                if (t4)
4444
                  {
4445
                    rtx t5;
4446
                    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4447
                                      NULL_RTX, 0);
4448
                    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4449
                                                            t4, t5),
4450
                                              tquotient);
4451
                  }
4452
              }
4453
          }
4454
 
4455
        if (quotient != 0)
4456
          break;
4457
        delete_insns_since (last);
4458
 
4459
        /* Try using an instruction that produces both the quotient and
4460
           remainder, using truncation.  We can easily compensate the quotient
4461
           or remainder to get floor rounding, once we have the remainder.
4462
           Notice that we compute also the final remainder value here,
4463
           and return the result right away.  */
4464
        if (target == 0 || GET_MODE (target) != compute_mode)
4465
          target = gen_reg_rtx (compute_mode);
4466
 
4467
        if (rem_flag)
4468
          {
4469
            remainder
4470
              = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4471
            quotient = gen_reg_rtx (compute_mode);
4472
          }
4473
        else
4474
          {
4475
            quotient
4476
              = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4477
            remainder = gen_reg_rtx (compute_mode);
4478
          }
4479
 
4480
        if (expand_twoval_binop (sdivmod_optab, op0, op1,
4481
                                 quotient, remainder, 0))
4482
          {
4483
            /* This could be computed with a branch-less sequence.
4484
               Save that for later.  */
4485
            rtx tem;
4486
            rtx label = gen_label_rtx ();
4487
            do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4488
            tem = expand_binop (compute_mode, xor_optab, op0, op1,
4489
                                NULL_RTX, 0, OPTAB_WIDEN);
4490
            do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4491
            expand_dec (quotient, const1_rtx);
4492
            expand_inc (remainder, op1);
4493
            emit_label (label);
4494
            return gen_lowpart (mode, rem_flag ? remainder : quotient);
4495
          }
4496
 
4497
        /* No luck with division elimination or divmod.  Have to do it
4498
           by conditionally adjusting op0 *and* the result.  */
4499
        {
4500
          rtx label1, label2, label3, label4, label5;
4501
          rtx adjusted_op0;
4502
          rtx tem;
4503
 
4504
          quotient = gen_reg_rtx (compute_mode);
4505
          adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4506
          label1 = gen_label_rtx ();
4507
          label2 = gen_label_rtx ();
4508
          label3 = gen_label_rtx ();
4509
          label4 = gen_label_rtx ();
4510
          label5 = gen_label_rtx ();
4511
          do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4512
          do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4513
          tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4514
                              quotient, 0, OPTAB_LIB_WIDEN);
4515
          if (tem != quotient)
4516
            emit_move_insn (quotient, tem);
4517
          emit_jump_insn (gen_jump (label5));
4518
          emit_barrier ();
4519
          emit_label (label1);
4520
          expand_inc (adjusted_op0, const1_rtx);
4521
          emit_jump_insn (gen_jump (label4));
4522
          emit_barrier ();
4523
          emit_label (label2);
4524
          do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4525
          tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4526
                              quotient, 0, OPTAB_LIB_WIDEN);
4527
          if (tem != quotient)
4528
            emit_move_insn (quotient, tem);
4529
          emit_jump_insn (gen_jump (label5));
4530
          emit_barrier ();
4531
          emit_label (label3);
4532
          expand_dec (adjusted_op0, const1_rtx);
4533
          emit_label (label4);
4534
          tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4535
                              quotient, 0, OPTAB_LIB_WIDEN);
4536
          if (tem != quotient)
4537
            emit_move_insn (quotient, tem);
4538
          expand_dec (quotient, const1_rtx);
4539
          emit_label (label5);
4540
        }
4541
        break;
4542
 
4543
      case CEIL_DIV_EXPR:
4544
      case CEIL_MOD_EXPR:
4545
        if (unsignedp)
4546
          {
4547
            if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4548
              {
4549
                rtx t1, t2, t3;
4550
                unsigned HOST_WIDE_INT d = INTVAL (op1);
4551
                t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4552
                                   build_int_cst (NULL_TREE, floor_log2 (d)),
4553
                                   tquotient, 1);
4554
                t2 = expand_binop (compute_mode, and_optab, op0,
4555
                                   GEN_INT (d - 1),
4556
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4557
                t3 = gen_reg_rtx (compute_mode);
4558
                t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4559
                                      compute_mode, 1, 1);
4560
                if (t3 == 0)
4561
                  {
4562
                    rtx lab;
4563
                    lab = gen_label_rtx ();
4564
                    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4565
                    expand_inc (t1, const1_rtx);
4566
                    emit_label (lab);
4567
                    quotient = t1;
4568
                  }
4569
                else
4570
                  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4571
                                                          t1, t3),
4572
                                            tquotient);
4573
                break;
4574
              }
4575
 
4576
            /* Try using an instruction that produces both the quotient and
4577
               remainder, using truncation.  We can easily compensate the
4578
               quotient or remainder to get ceiling rounding, once we have the
4579
               remainder.  Notice that we compute also the final remainder
4580
               value here, and return the result right away.  */
4581
            if (target == 0 || GET_MODE (target) != compute_mode)
4582
              target = gen_reg_rtx (compute_mode);
4583
 
4584
            if (rem_flag)
4585
              {
4586
                remainder = (REG_P (target)
4587
                             ? target : gen_reg_rtx (compute_mode));
4588
                quotient = gen_reg_rtx (compute_mode);
4589
              }
4590
            else
4591
              {
4592
                quotient = (REG_P (target)
4593
                            ? target : gen_reg_rtx (compute_mode));
4594
                remainder = gen_reg_rtx (compute_mode);
4595
              }
4596
 
4597
            if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4598
                                     remainder, 1))
4599
              {
4600
                /* This could be computed with a branch-less sequence.
4601
                   Save that for later.  */
4602
                rtx label = gen_label_rtx ();
4603
                do_cmp_and_jump (remainder, const0_rtx, EQ,
4604
                                 compute_mode, label);
4605
                expand_inc (quotient, const1_rtx);
4606
                expand_dec (remainder, op1);
4607
                emit_label (label);
4608
                return gen_lowpart (mode, rem_flag ? remainder : quotient);
4609
              }
4610
 
4611
            /* No luck with division elimination or divmod.  Have to do it
4612
               by conditionally adjusting op0 *and* the result.  */
4613
            {
4614
              rtx label1, label2;
4615
              rtx adjusted_op0, tem;
4616
 
4617
              quotient = gen_reg_rtx (compute_mode);
4618
              adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4619
              label1 = gen_label_rtx ();
4620
              label2 = gen_label_rtx ();
4621
              do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4622
                               compute_mode, label1);
4623
              emit_move_insn  (quotient, const0_rtx);
4624
              emit_jump_insn (gen_jump (label2));
4625
              emit_barrier ();
4626
              emit_label (label1);
4627
              expand_dec (adjusted_op0, const1_rtx);
4628
              tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4629
                                  quotient, 1, OPTAB_LIB_WIDEN);
4630
              if (tem != quotient)
4631
                emit_move_insn (quotient, tem);
4632
              expand_inc (quotient, const1_rtx);
4633
              emit_label (label2);
4634
            }
4635
          }
4636
        else /* signed */
4637
          {
4638
            if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4639
                && INTVAL (op1) >= 0)
4640
              {
4641
                /* This is extremely similar to the code for the unsigned case
4642
                   above.  For 2.7 we should merge these variants, but for
4643
                   2.6.1 I don't want to touch the code for unsigned since that
4644
                   get used in C.  The signed case will only be used by other
4645
                   languages (Ada).  */
4646
 
4647
                rtx t1, t2, t3;
4648
                unsigned HOST_WIDE_INT d = INTVAL (op1);
4649
                t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4650
                                   build_int_cst (NULL_TREE, floor_log2 (d)),
4651
                                   tquotient, 0);
4652
                t2 = expand_binop (compute_mode, and_optab, op0,
4653
                                   GEN_INT (d - 1),
4654
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4655
                t3 = gen_reg_rtx (compute_mode);
4656
                t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4657
                                      compute_mode, 1, 1);
4658
                if (t3 == 0)
4659
                  {
4660
                    rtx lab;
4661
                    lab = gen_label_rtx ();
4662
                    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4663
                    expand_inc (t1, const1_rtx);
4664
                    emit_label (lab);
4665
                    quotient = t1;
4666
                  }
4667
                else
4668
                  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4669
                                                          t1, t3),
4670
                                            tquotient);
4671
                break;
4672
              }
4673
 
4674
            /* Try using an instruction that produces both the quotient and
4675
               remainder, using truncation.  We can easily compensate the
4676
               quotient or remainder to get ceiling rounding, once we have the
4677
               remainder.  Notice that we compute also the final remainder
4678
               value here, and return the result right away.  */
4679
            if (target == 0 || GET_MODE (target) != compute_mode)
4680
              target = gen_reg_rtx (compute_mode);
4681
            if (rem_flag)
4682
              {
4683
                remainder= (REG_P (target)
4684
                            ? target : gen_reg_rtx (compute_mode));
4685
                quotient = gen_reg_rtx (compute_mode);
4686
              }
4687
            else
4688
              {
4689
                quotient = (REG_P (target)
4690
                            ? target : gen_reg_rtx (compute_mode));
4691
                remainder = gen_reg_rtx (compute_mode);
4692
              }
4693
 
4694
            if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4695
                                     remainder, 0))
4696
              {
4697
                /* This could be computed with a branch-less sequence.
4698
                   Save that for later.  */
4699
                rtx tem;
4700
                rtx label = gen_label_rtx ();
4701
                do_cmp_and_jump (remainder, const0_rtx, EQ,
4702
                                 compute_mode, label);
4703
                tem = expand_binop (compute_mode, xor_optab, op0, op1,
4704
                                    NULL_RTX, 0, OPTAB_WIDEN);
4705
                do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4706
                expand_inc (quotient, const1_rtx);
4707
                expand_dec (remainder, op1);
4708
                emit_label (label);
4709
                return gen_lowpart (mode, rem_flag ? remainder : quotient);
4710
              }
4711
 
4712
            /* No luck with division elimination or divmod.  Have to do it
4713
               by conditionally adjusting op0 *and* the result.  */
4714
            {
4715
              rtx label1, label2, label3, label4, label5;
4716
              rtx adjusted_op0;
4717
              rtx tem;
4718
 
4719
              quotient = gen_reg_rtx (compute_mode);
4720
              adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4721
              label1 = gen_label_rtx ();
4722
              label2 = gen_label_rtx ();
4723
              label3 = gen_label_rtx ();
4724
              label4 = gen_label_rtx ();
4725
              label5 = gen_label_rtx ();
4726
              do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4727
              do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4728
                               compute_mode, label1);
4729
              tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4730
                                  quotient, 0, OPTAB_LIB_WIDEN);
4731
              if (tem != quotient)
4732
                emit_move_insn (quotient, tem);
4733
              emit_jump_insn (gen_jump (label5));
4734
              emit_barrier ();
4735
              emit_label (label1);
4736
              expand_dec (adjusted_op0, const1_rtx);
4737
              emit_jump_insn (gen_jump (label4));
4738
              emit_barrier ();
4739
              emit_label (label2);
4740
              do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4741
                               compute_mode, label3);
4742
              tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4743
                                  quotient, 0, OPTAB_LIB_WIDEN);
4744
              if (tem != quotient)
4745
                emit_move_insn (quotient, tem);
4746
              emit_jump_insn (gen_jump (label5));
4747
              emit_barrier ();
4748
              emit_label (label3);
4749
              expand_inc (adjusted_op0, const1_rtx);
4750
              emit_label (label4);
4751
              tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4752
                                  quotient, 0, OPTAB_LIB_WIDEN);
4753
              if (tem != quotient)
4754
                emit_move_insn (quotient, tem);
4755
              expand_inc (quotient, const1_rtx);
4756
              emit_label (label5);
4757
            }
4758
          }
4759
        break;
4760
 
4761
      case EXACT_DIV_EXPR:
4762
        if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4763
          {
4764
            HOST_WIDE_INT d = INTVAL (op1);
4765
            unsigned HOST_WIDE_INT ml;
4766
            int pre_shift;
4767
            rtx t1;
4768
 
4769
            pre_shift = floor_log2 (d & -d);
4770
            ml = invert_mod2n (d >> pre_shift, size);
4771
            t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4772
                               build_int_cst (NULL_TREE, pre_shift),
4773
                               NULL_RTX, unsignedp);
4774
            quotient = expand_mult (compute_mode, t1,
4775
                                    gen_int_mode (ml, compute_mode),
4776
                                    NULL_RTX, 1);
4777
 
4778
            insn = get_last_insn ();
4779
            set_unique_reg_note (insn,
4780
                                 REG_EQUAL,
4781
                                 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4782
                                                 compute_mode,
4783
                                                 op0, op1));
4784
          }
4785
        break;
4786
 
4787
      case ROUND_DIV_EXPR:
4788
      case ROUND_MOD_EXPR:
4789
        if (unsignedp)
4790
          {
4791
            rtx tem;
4792
            rtx label;
4793
            label = gen_label_rtx ();
4794
            quotient = gen_reg_rtx (compute_mode);
4795
            remainder = gen_reg_rtx (compute_mode);
4796
            if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4797
              {
4798
                rtx tem;
4799
                quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4800
                                         quotient, 1, OPTAB_LIB_WIDEN);
4801
                tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4802
                remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4803
                                          remainder, 1, OPTAB_LIB_WIDEN);
4804
              }
4805
            tem = plus_constant (op1, -1);
4806
            tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4807
                                build_int_cst (NULL_TREE, 1),
4808
                                NULL_RTX, 1);
4809
            do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4810
            expand_inc (quotient, const1_rtx);
4811
            expand_dec (remainder, op1);
4812
            emit_label (label);
4813
          }
4814
        else
4815
          {
4816
            rtx abs_rem, abs_op1, tem, mask;
4817
            rtx label;
4818
            label = gen_label_rtx ();
4819
            quotient = gen_reg_rtx (compute_mode);
4820
            remainder = gen_reg_rtx (compute_mode);
4821
            if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4822
              {
4823
                rtx tem;
4824
                quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4825
                                         quotient, 0, OPTAB_LIB_WIDEN);
4826
                tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4827
                remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4828
                                          remainder, 0, OPTAB_LIB_WIDEN);
4829
              }
4830
            abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4831
            abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4832
            tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4833
                                build_int_cst (NULL_TREE, 1),
4834
                                NULL_RTX, 1);
4835
            do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4836
            tem = expand_binop (compute_mode, xor_optab, op0, op1,
4837
                                NULL_RTX, 0, OPTAB_WIDEN);
4838
            mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4839
                                 build_int_cst (NULL_TREE, size - 1),
4840
                                 NULL_RTX, 0);
4841
            tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4842
                                NULL_RTX, 0, OPTAB_WIDEN);
4843
            tem = expand_binop (compute_mode, sub_optab, tem, mask,
4844
                                NULL_RTX, 0, OPTAB_WIDEN);
4845
            expand_inc (quotient, tem);
4846
            tem = expand_binop (compute_mode, xor_optab, mask, op1,
4847
                                NULL_RTX, 0, OPTAB_WIDEN);
4848
            tem = expand_binop (compute_mode, sub_optab, tem, mask,
4849
                                NULL_RTX, 0, OPTAB_WIDEN);
4850
            expand_dec (remainder, tem);
4851
            emit_label (label);
4852
          }
4853
        return gen_lowpart (mode, rem_flag ? remainder : quotient);
4854
 
4855
      default:
4856
        gcc_unreachable ();
4857
      }
4858
 
4859
  if (quotient == 0)
4860
    {
4861
      if (target && GET_MODE (target) != compute_mode)
4862
        target = 0;
4863
 
4864
      if (rem_flag)
4865
        {
4866
          /* Try to produce the remainder without producing the quotient.
4867
             If we seem to have a divmod pattern that does not require widening,
4868
             don't try widening here.  We should really have a WIDEN argument
4869
             to expand_twoval_binop, since what we'd really like to do here is
4870
             1) try a mod insn in compute_mode
4871
             2) try a divmod insn in compute_mode
4872
             3) try a div insn in compute_mode and multiply-subtract to get
4873
                remainder
4874
             4) try the same things with widening allowed.  */
4875
          remainder
4876
            = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4877
                                 op0, op1, target,
4878
                                 unsignedp,
4879
                                 ((optab_handler (optab2, compute_mode)->insn_code
4880
                                   != CODE_FOR_nothing)
4881
                                  ? OPTAB_DIRECT : OPTAB_WIDEN));
4882
          if (remainder == 0)
4883
            {
4884
              /* No luck there.  Can we do remainder and divide at once
4885
                 without a library call?  */
4886
              remainder = gen_reg_rtx (compute_mode);
4887
              if (! expand_twoval_binop ((unsignedp
4888
                                          ? udivmod_optab
4889
                                          : sdivmod_optab),
4890
                                         op0, op1,
4891
                                         NULL_RTX, remainder, unsignedp))
4892
                remainder = 0;
4893
            }
4894
 
4895
          if (remainder)
4896
            return gen_lowpart (mode, remainder);
4897
        }
4898
 
4899
      /* Produce the quotient.  Try a quotient insn, but not a library call.
4900
         If we have a divmod in this mode, use it in preference to widening
4901
         the div (for this test we assume it will not fail). Note that optab2
4902
         is set to the one of the two optabs that the call below will use.  */
4903
      quotient
4904
        = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4905
                             op0, op1, rem_flag ? NULL_RTX : target,
4906
                             unsignedp,
4907
                             ((optab_handler (optab2, compute_mode)->insn_code
4908
                               != CODE_FOR_nothing)
4909
                              ? OPTAB_DIRECT : OPTAB_WIDEN));
4910
 
4911
      if (quotient == 0)
4912
        {
4913
          /* No luck there.  Try a quotient-and-remainder insn,
4914
             keeping the quotient alone.  */
4915
          quotient = gen_reg_rtx (compute_mode);
4916
          if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4917
                                     op0, op1,
4918
                                     quotient, NULL_RTX, unsignedp))
4919
            {
4920
              quotient = 0;
4921
              if (! rem_flag)
4922
                /* Still no luck.  If we are not computing the remainder,
4923
                   use a library call for the quotient.  */
4924
                quotient = sign_expand_binop (compute_mode,
4925
                                              udiv_optab, sdiv_optab,
4926
                                              op0, op1, target,
4927
                                              unsignedp, OPTAB_LIB_WIDEN);
4928
            }
4929
        }
4930
    }
4931
 
4932
  if (rem_flag)
4933
    {
4934
      if (target && GET_MODE (target) != compute_mode)
4935
        target = 0;
4936
 
4937
      if (quotient == 0)
4938
        {
4939
          /* No divide instruction either.  Use library for remainder.  */
4940
          remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4941
                                         op0, op1, target,
4942
                                         unsignedp, OPTAB_LIB_WIDEN);
4943
          /* No remainder function.  Try a quotient-and-remainder
4944
             function, keeping the remainder.  */
4945
          if (!remainder)
4946
            {
4947
              remainder = gen_reg_rtx (compute_mode);
4948
              if (!expand_twoval_binop_libfunc
4949
                  (unsignedp ? udivmod_optab : sdivmod_optab,
4950
                   op0, op1,
4951
                   NULL_RTX, remainder,
4952
                   unsignedp ? UMOD : MOD))
4953
                remainder = NULL_RTX;
4954
            }
4955
        }
4956
      else
4957
        {
4958
          /* We divided.  Now finish doing X - Y * (X / Y).  */
4959
          remainder = expand_mult (compute_mode, quotient, op1,
4960
                                   NULL_RTX, unsignedp);
4961
          remainder = expand_binop (compute_mode, sub_optab, op0,
4962
                                    remainder, target, unsignedp,
4963
                                    OPTAB_LIB_WIDEN);
4964
        }
4965
    }
4966
 
4967
  return gen_lowpart (mode, rem_flag ? remainder : quotient);
4968
}
4969
 
4970
/* Return a tree node with data type TYPE, describing the value of X.
4971
   Usually this is an VAR_DECL, if there is no obvious better choice.
4972
   X may be an expression, however we only support those expressions
4973
   generated by loop.c.  */
4974
 
4975
tree
4976
make_tree (tree type, rtx x)
4977
{
4978
  tree t;
4979
 
4980
  switch (GET_CODE (x))
4981
    {
4982
    case CONST_INT:
4983
      {
4984
        HOST_WIDE_INT hi = 0;
4985
 
4986
        if (INTVAL (x) < 0
4987
            && !(TYPE_UNSIGNED (type)
4988
                 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4989
                     < HOST_BITS_PER_WIDE_INT)))
4990
          hi = -1;
4991
 
4992
        t = build_int_cst_wide (type, INTVAL (x), hi);
4993
 
4994
        return t;
4995
      }
4996
 
4997
    case CONST_DOUBLE:
4998
      if (GET_MODE (x) == VOIDmode)
4999
        t = build_int_cst_wide (type,
5000
                                CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5001
      else
5002
        {
5003
          REAL_VALUE_TYPE d;
5004
 
5005
          REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5006
          t = build_real (type, d);
5007
        }
5008
 
5009
      return t;
5010
 
5011
    case CONST_VECTOR:
5012
      {
5013
        int units = CONST_VECTOR_NUNITS (x);
5014
        tree itype = TREE_TYPE (type);
5015
        tree t = NULL_TREE;
5016
        int i;
5017
 
5018
 
5019
        /* Build a tree with vector elements.  */
5020
        for (i = units - 1; i >= 0; --i)
5021
          {
5022
            rtx elt = CONST_VECTOR_ELT (x, i);
5023
            t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
5024
          }
5025
 
5026
        return build_vector (type, t);
5027
      }
5028
 
5029
    case PLUS:
5030
      return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5031
                          make_tree (type, XEXP (x, 1)));
5032
 
5033
    case MINUS:
5034
      return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5035
                          make_tree (type, XEXP (x, 1)));
5036
 
5037
    case NEG:
5038
      return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5039
 
5040
    case MULT:
5041
      return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5042
                          make_tree (type, XEXP (x, 1)));
5043
 
5044
    case ASHIFT:
5045
      return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5046
                          make_tree (type, XEXP (x, 1)));
5047
 
5048
    case LSHIFTRT:
5049
      t = unsigned_type_for (type);
5050
      return fold_convert (type, build2 (RSHIFT_EXPR, t,
5051
                                         make_tree (t, XEXP (x, 0)),
5052
                                         make_tree (type, XEXP (x, 1))));
5053
 
5054
    case ASHIFTRT:
5055
      t = signed_type_for (type);
5056
      return fold_convert (type, build2 (RSHIFT_EXPR, t,
5057
                                         make_tree (t, XEXP (x, 0)),
5058
                                         make_tree (type, XEXP (x, 1))));
5059
 
5060
    case DIV:
5061
      if (TREE_CODE (type) != REAL_TYPE)
5062
        t = signed_type_for (type);
5063
      else
5064
        t = type;
5065
 
5066
      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5067
                                         make_tree (t, XEXP (x, 0)),
5068
                                         make_tree (t, XEXP (x, 1))));
5069
    case UDIV:
5070
      t = unsigned_type_for (type);
5071
      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5072
                                         make_tree (t, XEXP (x, 0)),
5073
                                         make_tree (t, XEXP (x, 1))));
5074
 
5075
    case SIGN_EXTEND:
5076
    case ZERO_EXTEND:
5077
      t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5078
                                          GET_CODE (x) == ZERO_EXTEND);
5079
      return fold_convert (type, make_tree (t, XEXP (x, 0)));
5080
 
5081
    case CONST:
5082
      return make_tree (type, XEXP (x, 0));
5083
 
5084
    case SYMBOL_REF:
5085
      t = SYMBOL_REF_DECL (x);
5086
      if (t)
5087
        return fold_convert (type, build_fold_addr_expr (t));
5088
      /* else fall through.  */
5089
 
5090
    default:
5091
      t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5092
 
5093
      /* If TYPE is a POINTER_TYPE, we might need to convert X from
5094
         address mode to pointer mode.  */
5095
      if (POINTER_TYPE_P (type))
5096
        x = convert_memory_address_addr_space
5097
              (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5098
 
5099
      /* Note that we do *not* use SET_DECL_RTL here, because we do not
5100
         want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5101
      t->decl_with_rtl.rtl = x;
5102
 
5103
      return t;
5104
    }
5105
}
5106
 
5107
/* Compute the logical-and of OP0 and OP1, storing it in TARGET
5108
   and returning TARGET.
5109
 
5110
   If TARGET is 0, a pseudo-register or constant is returned.  */
5111
 
5112
rtx
5113
expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5114
{
5115
  rtx tem = 0;
5116
 
5117
  if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5118
    tem = simplify_binary_operation (AND, mode, op0, op1);
5119
  if (tem == 0)
5120
    tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5121
 
5122
  if (target == 0)
5123
    target = tem;
5124
  else if (tem != target)
5125
    emit_move_insn (target, tem);
5126
  return target;
5127
}
5128
 
5129
/* Helper function for emit_store_flag.  */
5130
static rtx
5131
emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5132
             enum machine_mode mode, enum machine_mode compare_mode,
5133
             int unsignedp, rtx x, rtx y, int normalizep,
5134
             enum machine_mode target_mode)
5135
{
5136
  rtx op0, last, comparison, subtarget, pattern;
5137
  enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5138
 
5139
  last = get_last_insn ();
5140
  x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5141
  y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5142
  comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5143
  if (!x || !y
5144
      || !insn_data[icode].operand[2].predicate
5145
          (x, insn_data[icode].operand[2].mode)
5146
      || !insn_data[icode].operand[3].predicate
5147
          (y, insn_data[icode].operand[3].mode)
5148
      || !insn_data[icode].operand[1].predicate (comparison, VOIDmode))
5149
    {
5150
      delete_insns_since (last);
5151
      return NULL_RTX;
5152
    }
5153
 
5154
  if (target_mode == VOIDmode)
5155
    target_mode = result_mode;
5156
  if (!target)
5157
    target = gen_reg_rtx (target_mode);
5158
 
5159
  if (optimize
5160
      || !(insn_data[(int) icode].operand[0].predicate (target, result_mode)))
5161
    subtarget = gen_reg_rtx (result_mode);
5162
  else
5163
    subtarget = target;
5164
 
5165
  pattern = GEN_FCN (icode) (subtarget, comparison, x, y);
5166
  if (!pattern)
5167
    return NULL_RTX;
5168
  emit_insn (pattern);
5169
 
5170
  /* If we are converting to a wider mode, first convert to
5171
     TARGET_MODE, then normalize.  This produces better combining
5172
     opportunities on machines that have a SIGN_EXTRACT when we are
5173
     testing a single bit.  This mostly benefits the 68k.
5174
 
5175
     If STORE_FLAG_VALUE does not have the sign bit set when
5176
     interpreted in MODE, we can do this conversion as unsigned, which
5177
     is usually more efficient.  */
5178
  if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5179
    {
5180
      convert_move (target, subtarget,
5181
                    (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT)
5182
                    && 0 == (STORE_FLAG_VALUE
5183
                             & ((HOST_WIDE_INT) 1
5184
                                << (GET_MODE_BITSIZE (result_mode) -1))));
5185
      op0 = target;
5186
      result_mode = target_mode;
5187
    }
5188
  else
5189
    op0 = subtarget;
5190
 
5191
  /* If we want to keep subexpressions around, don't reuse our last
5192
     target.  */
5193
  if (optimize)
5194
    subtarget = 0;
5195
 
5196
  /* Now normalize to the proper value in MODE.  Sometimes we don't
5197
     have to do anything.  */
5198
  if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5199
    ;
5200
  /* STORE_FLAG_VALUE might be the most negative number, so write
5201
     the comparison this way to avoid a compiler-time warning.  */
5202
  else if (- normalizep == STORE_FLAG_VALUE)
5203
    op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5204
 
5205
  /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5206
     it hard to use a value of just the sign bit due to ANSI integer
5207
     constant typing rules.  */
5208
  else if (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT
5209
           && (STORE_FLAG_VALUE
5210
               & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (result_mode) - 1))))
5211
    op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5212
                        size_int (GET_MODE_BITSIZE (result_mode) - 1), subtarget,
5213
                        normalizep == 1);
5214
  else
5215
    {
5216
      gcc_assert (STORE_FLAG_VALUE & 1);
5217
 
5218
      op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5219
      if (normalizep == -1)
5220
        op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5221
    }
5222
 
5223
  /* If we were converting to a smaller mode, do the conversion now.  */
5224
  if (target_mode != result_mode)
5225
    {
5226
      convert_move (target, op0, 0);
5227
      return target;
5228
    }
5229
  else
5230
    return op0;
5231
}
5232
 
5233
 
5234
/* A subroutine of emit_store_flag only including "tricks" that do not
5235
   need a recursive call.  These are kept separate to avoid infinite
5236
   loops.  */
5237
 
5238
static rtx
5239
emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5240
                   enum machine_mode mode, int unsignedp, int normalizep,
5241
                   enum machine_mode target_mode)
5242
{
5243
  rtx subtarget;
5244
  enum insn_code icode;
5245
  enum machine_mode compare_mode;
5246
  enum mode_class mclass;
5247
  enum rtx_code scode;
5248
  rtx tem;
5249
 
5250
  if (unsignedp)
5251
    code = unsigned_condition (code);
5252
  scode = swap_condition (code);
5253
 
5254
  /* If one operand is constant, make it the second one.  Only do this
5255
     if the other operand is not constant as well.  */
5256
 
5257
  if (swap_commutative_operands_p (op0, op1))
5258
    {
5259
      tem = op0;
5260
      op0 = op1;
5261
      op1 = tem;
5262
      code = swap_condition (code);
5263
    }
5264
 
5265
  if (mode == VOIDmode)
5266
    mode = GET_MODE (op0);
5267
 
5268
  /* For some comparisons with 1 and -1, we can convert this to
5269
     comparisons with zero.  This will often produce more opportunities for
5270
     store-flag insns.  */
5271
 
5272
  switch (code)
5273
    {
5274
    case LT:
5275
      if (op1 == const1_rtx)
5276
        op1 = const0_rtx, code = LE;
5277
      break;
5278
    case LE:
5279
      if (op1 == constm1_rtx)
5280
        op1 = const0_rtx, code = LT;
5281
      break;
5282
    case GE:
5283
      if (op1 == const1_rtx)
5284
        op1 = const0_rtx, code = GT;
5285
      break;
5286
    case GT:
5287
      if (op1 == constm1_rtx)
5288
        op1 = const0_rtx, code = GE;
5289
      break;
5290
    case GEU:
5291
      if (op1 == const1_rtx)
5292
        op1 = const0_rtx, code = NE;
5293
      break;
5294
    case LTU:
5295
      if (op1 == const1_rtx)
5296
        op1 = const0_rtx, code = EQ;
5297
      break;
5298
    default:
5299
      break;
5300
    }
5301
 
5302
  /* If we are comparing a double-word integer with zero or -1, we can
5303
     convert the comparison into one involving a single word.  */
5304
  if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5305
      && GET_MODE_CLASS (mode) == MODE_INT
5306
      && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5307
    {
5308
      if ((code == EQ || code == NE)
5309
          && (op1 == const0_rtx || op1 == constm1_rtx))
5310
        {
5311
          rtx op00, op01;
5312
 
5313
          /* Do a logical OR or AND of the two words and compare the
5314
             result.  */
5315
          op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5316
          op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5317
          tem = expand_binop (word_mode,
5318
                              op1 == const0_rtx ? ior_optab : and_optab,
5319
                              op00, op01, NULL_RTX, unsignedp,
5320
                              OPTAB_DIRECT);
5321
 
5322
          if (tem != 0)
5323
            tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5324
                                   unsignedp, normalizep);
5325
        }
5326
      else if ((code == LT || code == GE) && op1 == const0_rtx)
5327
        {
5328
          rtx op0h;
5329
 
5330
          /* If testing the sign bit, can just test on high word.  */
5331
          op0h = simplify_gen_subreg (word_mode, op0, mode,
5332
                                      subreg_highpart_offset (word_mode,
5333
                                                              mode));
5334
          tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5335
                                 unsignedp, normalizep);
5336
        }
5337
      else
5338
        tem = NULL_RTX;
5339
 
5340
      if (tem)
5341
        {
5342
          if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5343
            return tem;
5344
          if (!target)
5345
            target = gen_reg_rtx (target_mode);
5346
 
5347
          convert_move (target, tem,
5348
 
5349
                              & ((HOST_WIDE_INT) 1
5350
                                 << (GET_MODE_BITSIZE (word_mode) -1))));
5351
          return target;
5352
        }
5353
    }
5354
 
5355
  /* If this is A < 0 or A >= 0, we can do this by taking the ones
5356
     complement of A (for GE) and shifting the sign bit to the low bit.  */
5357
  if (op1 == const0_rtx && (code == LT || code == GE)
5358
      && GET_MODE_CLASS (mode) == MODE_INT
5359
      && (normalizep || STORE_FLAG_VALUE == 1
5360
          || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5361
              && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5362
                  == ((unsigned HOST_WIDE_INT) 1
5363
                      << (GET_MODE_BITSIZE (mode) - 1))))))
5364
    {
5365
      subtarget = target;
5366
 
5367
      if (!target)
5368
        target_mode = mode;
5369
 
5370
      /* If the result is to be wider than OP0, it is best to convert it
5371
         first.  If it is to be narrower, it is *incorrect* to convert it
5372
         first.  */
5373
      else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5374
        {
5375
          op0 = convert_modes (target_mode, mode, op0, 0);
5376
          mode = target_mode;
5377
        }
5378
 
5379
      if (target_mode != mode)
5380
        subtarget = 0;
5381
 
5382
      if (code == GE)
5383
        op0 = expand_unop (mode, one_cmpl_optab, op0,
5384
                           ((STORE_FLAG_VALUE == 1 || normalizep)
5385
                            ? 0 : subtarget), 0);
5386
 
5387
      if (STORE_FLAG_VALUE == 1 || normalizep)
5388
        /* If we are supposed to produce a 0/1 value, we want to do
5389
           a logical shift from the sign bit to the low-order bit; for
5390
           a -1/0 value, we do an arithmetic shift.  */
5391
        op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5392
                            size_int (GET_MODE_BITSIZE (mode) - 1),
5393
                            subtarget, normalizep != -1);
5394
 
5395
      if (mode != target_mode)
5396
        op0 = convert_modes (target_mode, mode, op0, 0);
5397
 
5398
      return op0;
5399
    }
5400
 
5401
  mclass = GET_MODE_CLASS (mode);
5402
  for (compare_mode = mode; compare_mode != VOIDmode;
5403
       compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5404
    {
5405
     enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5406
     icode = optab_handler (cstore_optab, optab_mode)->insn_code;
5407
     if (icode != CODE_FOR_nothing)
5408
        {
5409
          do_pending_stack_adjust ();
5410
          tem = emit_cstore (target, icode, code, mode, compare_mode,
5411
                             unsignedp, op0, op1, normalizep, target_mode);
5412
          if (tem)
5413
            return tem;
5414
 
5415
          if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5416
            {
5417
              tem = emit_cstore (target, icode, scode, mode, compare_mode,
5418
                                 unsignedp, op1, op0, normalizep, target_mode);
5419
              if (tem)
5420
                return tem;
5421
            }
5422
          break;
5423
        }
5424
    }
5425
 
5426
  return 0;
5427
}
5428
 
5429
/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5430
   and storing in TARGET.  Normally return TARGET.
5431
   Return 0 if that cannot be done.
5432
 
5433
   MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5434
   it is VOIDmode, they cannot both be CONST_INT.
5435
 
5436
   UNSIGNEDP is for the case where we have to widen the operands
5437
   to perform the operation.  It says to use zero-extension.
5438
 
5439
   NORMALIZEP is 1 if we should convert the result to be either zero
5440
   or one.  Normalize is -1 if we should convert the result to be
5441
   either zero or -1.  If NORMALIZEP is zero, the result will be left
5442
   "raw" out of the scc insn.  */
5443
 
5444
rtx
5445
emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5446
                 enum machine_mode mode, int unsignedp, int normalizep)
5447
{
5448
  enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5449
  enum rtx_code rcode;
5450
  rtx subtarget;
5451
  rtx tem, last, trueval;
5452
 
5453
  tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5454
                           target_mode);
5455
  if (tem)
5456
    return tem;
5457
 
5458
  /* If we reached here, we can't do this with a scc insn, however there
5459
     are some comparisons that can be done in other ways.  Don't do any
5460
     of these cases if branches are very cheap.  */
5461
  if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5462
    return 0;
5463
 
5464
  /* See what we need to return.  We can only return a 1, -1, or the
5465
     sign bit.  */
5466
 
5467
  if (normalizep == 0)
5468
    {
5469
      if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5470
        normalizep = STORE_FLAG_VALUE;
5471
 
5472
      else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5473
               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5474
                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5475
        ;
5476
      else
5477
        return 0;
5478
    }
5479
 
5480
  last = get_last_insn ();
5481
 
5482
  /* If optimizing, use different pseudo registers for each insn, instead
5483
     of reusing the same pseudo.  This leads to better CSE, but slows
5484
     down the compiler, since there are more pseudos */
5485
  subtarget = (!optimize
5486
               && (target_mode == mode)) ? target : NULL_RTX;
5487
  trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5488
 
5489
  /* For floating-point comparisons, try the reverse comparison or try
5490
     changing the "orderedness" of the comparison.  */
5491
  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5492
    {
5493
      enum rtx_code first_code;
5494
      bool and_them;
5495
 
5496
      rcode = reverse_condition_maybe_unordered (code);
5497
      if (can_compare_p (rcode, mode, ccp_store_flag)
5498
          && (code == ORDERED || code == UNORDERED
5499
              || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5500
              || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5501
        {
5502
          int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5503
                          || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5504
 
5505
          /* For the reverse comparison, use either an addition or a XOR.  */
5506
          if (want_add
5507
              && rtx_cost (GEN_INT (normalizep), PLUS,
5508
                           optimize_insn_for_speed_p ()) == 0)
5509
            {
5510
              tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5511
                                       STORE_FLAG_VALUE, target_mode);
5512
              if (tem)
5513
                return expand_binop (target_mode, add_optab, tem,
5514
                                     GEN_INT (normalizep),
5515
                                     target, 0, OPTAB_WIDEN);
5516
            }
5517
          else if (!want_add
5518
                   && rtx_cost (trueval, XOR,
5519
                                optimize_insn_for_speed_p ()) == 0)
5520
            {
5521
              tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5522
                                       normalizep, target_mode);
5523
              if (tem)
5524
                return expand_binop (target_mode, xor_optab, tem, trueval,
5525
                                     target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5526
            }
5527
        }
5528
 
5529
      delete_insns_since (last);
5530
 
5531
      /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5532
      if (code == ORDERED || code == UNORDERED)
5533
        return 0;
5534
 
5535
      and_them = split_comparison (code, mode, &first_code, &code);
5536
 
5537
      /* If there are no NaNs, the first comparison should always fall through.
5538
         Effectively change the comparison to the other one.  */
5539
      if (!HONOR_NANS (mode))
5540
        {
5541
          gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5542
          return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5543
                                    target_mode);
5544
        }
5545
 
5546
#ifdef HAVE_conditional_move
5547
      /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5548
         conditional move.  */
5549
      tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5550
                               normalizep, target_mode);
5551
      if (tem == 0)
5552
        return 0;
5553
 
5554
      if (and_them)
5555
        tem = emit_conditional_move (target, code, op0, op1, mode,
5556
                                     tem, const0_rtx, GET_MODE (tem), 0);
5557
      else
5558
        tem = emit_conditional_move (target, code, op0, op1, mode,
5559
                                     trueval, tem, GET_MODE (tem), 0);
5560
 
5561
      if (tem == 0)
5562
        delete_insns_since (last);
5563
      return tem;
5564
#else
5565
      return 0;
5566
#endif
5567
    }
5568
 
5569
  /* The remaining tricks only apply to integer comparisons.  */
5570
 
5571
  if (GET_MODE_CLASS (mode) != MODE_INT)
5572
    return 0;
5573
 
5574
  /* If this is an equality comparison of integers, we can try to exclusive-or
5575
     (or subtract) the two operands and use a recursive call to try the
5576
     comparison with zero.  Don't do any of these cases if branches are
5577
     very cheap.  */
5578
 
5579
  if ((code == EQ || code == NE) && op1 != const0_rtx)
5580
    {
5581
      tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5582
                          OPTAB_WIDEN);
5583
 
5584
      if (tem == 0)
5585
        tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5586
                            OPTAB_WIDEN);
5587
      if (tem != 0)
5588
        tem = emit_store_flag (target, code, tem, const0_rtx,
5589
                               mode, unsignedp, normalizep);
5590
      if (tem != 0)
5591
        return tem;
5592
 
5593
      delete_insns_since (last);
5594
    }
5595
 
5596
  /* For integer comparisons, try the reverse comparison.  However, for
5597
     small X and if we'd have anyway to extend, implementing "X != 0"
5598
     as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5599
  rcode = reverse_condition (code);
5600
  if (can_compare_p (rcode, mode, ccp_store_flag)
5601
      && ! (optab_handler (cstore_optab, mode)->insn_code == CODE_FOR_nothing
5602
            && code == NE
5603
            && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5604
            && op1 == const0_rtx))
5605
    {
5606
      int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5607
                      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5608
 
5609
      /* Again, for the reverse comparison, use either an addition or a XOR.  */
5610
      if (want_add
5611
          && rtx_cost (GEN_INT (normalizep), PLUS,
5612
                       optimize_insn_for_speed_p ()) == 0)
5613
        {
5614
          tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5615
                                   STORE_FLAG_VALUE, target_mode);
5616
          if (tem != 0)
5617
            tem = expand_binop (target_mode, add_optab, tem,
5618
                                GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5619
        }
5620
      else if (!want_add
5621
               && rtx_cost (trueval, XOR,
5622
                            optimize_insn_for_speed_p ()) == 0)
5623
        {
5624
          tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5625
                                   normalizep, target_mode);
5626
          if (tem != 0)
5627
            tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5628
                                INTVAL (trueval) >= 0, OPTAB_WIDEN);
5629
        }
5630
 
5631
      if (tem != 0)
5632
        return tem;
5633
      delete_insns_since (last);
5634
    }
5635
 
5636
  /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5637
     the constant zero.  Reject all other comparisons at this point.  Only
5638
     do LE and GT if branches are expensive since they are expensive on
5639
     2-operand machines.  */
5640
 
5641
  if (op1 != const0_rtx
5642
      || (code != EQ && code != NE
5643
          && (BRANCH_COST (optimize_insn_for_speed_p (),
5644
                           false) <= 1 || (code != LE && code != GT))))
5645
    return 0;
5646
 
5647
  /* Try to put the result of the comparison in the sign bit.  Assume we can't
5648
     do the necessary operation below.  */
5649
 
5650
  tem = 0;
5651
 
5652
  /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5653
     the sign bit set.  */
5654
 
5655
  if (code == LE)
5656
    {
5657
      /* This is destructive, so SUBTARGET can't be OP0.  */
5658
      if (rtx_equal_p (subtarget, op0))
5659
        subtarget = 0;
5660
 
5661
      tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5662
                          OPTAB_WIDEN);
5663
      if (tem)
5664
        tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5665
                            OPTAB_WIDEN);
5666
    }
5667
 
5668
  /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5669
     number of bits in the mode of OP0, minus one.  */
5670
 
5671
  if (code == GT)
5672
    {
5673
      if (rtx_equal_p (subtarget, op0))
5674
        subtarget = 0;
5675
 
5676
      tem = expand_shift (RSHIFT_EXPR, mode, op0,
5677
                          size_int (GET_MODE_BITSIZE (mode) - 1),
5678
                          subtarget, 0);
5679
      tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5680
                          OPTAB_WIDEN);
5681
    }
5682
 
5683
  if (code == EQ || code == NE)
5684
    {
5685
      /* For EQ or NE, one way to do the comparison is to apply an operation
5686
         that converts the operand into a positive number if it is nonzero
5687
         or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5688
         for NE we negate.  This puts the result in the sign bit.  Then we
5689
         normalize with a shift, if needed.
5690
 
5691
         Two operations that can do the above actions are ABS and FFS, so try
5692
         them.  If that doesn't work, and MODE is smaller than a full word,
5693
         we can use zero-extension to the wider mode (an unsigned conversion)
5694
         as the operation.  */
5695
 
5696
      /* Note that ABS doesn't yield a positive number for INT_MIN, but
5697
         that is compensated by the subsequent overflow when subtracting
5698
         one / negating.  */
5699
 
5700
      if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5701
        tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5702
      else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5703
        tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5704
      else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5705
        {
5706
          tem = convert_modes (word_mode, mode, op0, 1);
5707
          mode = word_mode;
5708
        }
5709
 
5710
      if (tem != 0)
5711
        {
5712
          if (code == EQ)
5713
            tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5714
                                0, OPTAB_WIDEN);
5715
          else
5716
            tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5717
        }
5718
 
5719
      /* If we couldn't do it that way, for NE we can "or" the two's complement
5720
         of the value with itself.  For EQ, we take the one's complement of
5721
         that "or", which is an extra insn, so we only handle EQ if branches
5722
         are expensive.  */
5723
 
5724
      if (tem == 0
5725
          && (code == NE
5726
              || BRANCH_COST (optimize_insn_for_speed_p (),
5727
                              false) > 1))
5728
        {
5729
          if (rtx_equal_p (subtarget, op0))
5730
            subtarget = 0;
5731
 
5732
          tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5733
          tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5734
                              OPTAB_WIDEN);
5735
 
5736
          if (tem && code == EQ)
5737
            tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5738
        }
5739
    }
5740
 
5741
  if (tem && normalizep)
5742
    tem = expand_shift (RSHIFT_EXPR, mode, tem,
5743
                        size_int (GET_MODE_BITSIZE (mode) - 1),
5744
                        subtarget, normalizep == 1);
5745
 
5746
  if (tem)
5747
    {
5748
      if (!target)
5749
        ;
5750
      else if (GET_MODE (tem) != target_mode)
5751
        {
5752
          convert_move (target, tem, 0);
5753
          tem = target;
5754
        }
5755
      else if (!subtarget)
5756
        {
5757
          emit_move_insn (target, tem);
5758
          tem = target;
5759
        }
5760
    }
5761
  else
5762
    delete_insns_since (last);
5763
 
5764
  return tem;
5765
}
5766
 
5767
/* Like emit_store_flag, but always succeeds.  */
5768
 
5769
rtx
5770
emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5771
                       enum machine_mode mode, int unsignedp, int normalizep)
5772
{
5773
  rtx tem, label;
5774
  rtx trueval, falseval;
5775
 
5776
  /* First see if emit_store_flag can do the job.  */
5777
  tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5778
  if (tem != 0)
5779
    return tem;
5780
 
5781
  if (!target)
5782
    target = gen_reg_rtx (word_mode);
5783
 
5784
  /* If this failed, we have to do this with set/compare/jump/set code.
5785
     For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5786
  trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5787
  if (code == NE
5788
      && GET_MODE_CLASS (mode) == MODE_INT
5789
      && REG_P (target)
5790
      && op0 == target
5791
      && op1 == const0_rtx)
5792
    {
5793
      label = gen_label_rtx ();
5794
      do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5795
                               mode, NULL_RTX, NULL_RTX, label, -1);
5796
      emit_move_insn (target, trueval);
5797
      emit_label (label);
5798
      return target;
5799
    }
5800
 
5801
  if (!REG_P (target)
5802
      || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5803
    target = gen_reg_rtx (GET_MODE (target));
5804
 
5805
  /* Jump in the right direction if the target cannot implement CODE
5806
     but can jump on its reverse condition.  */
5807
  falseval = const0_rtx;
5808
  if (! can_compare_p (code, mode, ccp_jump)
5809
      && (! FLOAT_MODE_P (mode)
5810
          || code == ORDERED || code == UNORDERED
5811
          || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5812
          || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5813
    {
5814
      enum rtx_code rcode;
5815
      if (FLOAT_MODE_P (mode))
5816
        rcode = reverse_condition_maybe_unordered (code);
5817
      else
5818
        rcode = reverse_condition (code);
5819
 
5820
      /* Canonicalize to UNORDERED for the libcall.  */
5821
      if (can_compare_p (rcode, mode, ccp_jump)
5822
          || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5823
        {
5824
          falseval = trueval;
5825
          trueval = const0_rtx;
5826
          code = rcode;
5827
        }
5828
    }
5829
 
5830
  emit_move_insn (target, trueval);
5831
  label = gen_label_rtx ();
5832
  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5833
                           NULL_RTX, label, -1);
5834
 
5835
  emit_move_insn (target, falseval);
5836
  emit_label (label);
5837
 
5838
  return target;
5839
}
5840
 
5841
/* Perform possibly multi-word comparison and conditional jump to LABEL
5842
   if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5843
   now a thin wrapper around do_compare_rtx_and_jump.  */
5844
 
5845
static void
5846
do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5847
                 rtx label)
5848
{
5849
  int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5850
  do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5851
                           NULL_RTX, NULL_RTX, label, -1);
5852
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.