OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [gcc/] [expmed.c] - Blame information for rev 378

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 280 jeremybenn
/* Medium-level subroutines: convert bit-field store and extract
2
   and shifts, multiplies and divides to rtl instructions.
3
   Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4
   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
5
   Free Software Foundation, Inc.
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify it under
10
the terms of the GNU General Public License as published by the Free
11
Software Foundation; either version 3, or (at your option) any later
12
version.
13
 
14
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
 
24
#include "config.h"
25
#include "system.h"
26
#include "coretypes.h"
27
#include "tm.h"
28
#include "toplev.h"
29
#include "rtl.h"
30
#include "tree.h"
31
#include "tm_p.h"
32
#include "flags.h"
33
#include "insn-config.h"
34
#include "expr.h"
35
#include "optabs.h"
36
#include "real.h"
37
#include "recog.h"
38
#include "langhooks.h"
39
#include "df.h"
40
#include "target.h"
41
 
42
static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
43
                                   unsigned HOST_WIDE_INT,
44
                                   unsigned HOST_WIDE_INT, rtx);
45
static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
46
                                   unsigned HOST_WIDE_INT, rtx);
47
static rtx extract_fixed_bit_field (enum machine_mode, rtx,
48
                                    unsigned HOST_WIDE_INT,
49
                                    unsigned HOST_WIDE_INT,
50
                                    unsigned HOST_WIDE_INT, rtx, int);
51
static rtx mask_rtx (enum machine_mode, int, int, int);
52
static rtx lshift_value (enum machine_mode, rtx, int, int);
53
static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
54
                                    unsigned HOST_WIDE_INT, int);
55
static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
56
static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
57
static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
58
 
59
/* Test whether a value is zero of a power of two.  */
60
#define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
61
 
62
/* Nonzero means divides or modulus operations are relatively cheap for
63
   powers of two, so don't use branches; emit the operation instead.
64
   Usually, this will mean that the MD file will emit non-branch
65
   sequences.  */
66
 
67
static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES];
68
static bool smod_pow2_cheap[2][NUM_MACHINE_MODES];
69
 
70
#ifndef SLOW_UNALIGNED_ACCESS
71
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
72
#endif
73
 
74
/* For compilers that support multiple targets with different word sizes,
75
   MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
76
   is the H8/300(H) compiler.  */
77
 
78
#ifndef MAX_BITS_PER_WORD
79
#define MAX_BITS_PER_WORD BITS_PER_WORD
80
#endif
81
 
82
/* Reduce conditional compilation elsewhere.  */
83
#ifndef HAVE_insv
84
#define HAVE_insv       0
85
#define CODE_FOR_insv   CODE_FOR_nothing
86
#define gen_insv(a,b,c,d) NULL_RTX
87
#endif
88
#ifndef HAVE_extv
89
#define HAVE_extv       0
90
#define CODE_FOR_extv   CODE_FOR_nothing
91
#define gen_extv(a,b,c,d) NULL_RTX
92
#endif
93
#ifndef HAVE_extzv
94
#define HAVE_extzv      0
95
#define CODE_FOR_extzv  CODE_FOR_nothing
96
#define gen_extzv(a,b,c,d) NULL_RTX
97
#endif
98
 
99
/* Cost of various pieces of RTL.  Note that some of these are indexed by
100
   shift count and some by mode.  */
101
static int zero_cost[2];
102
static int add_cost[2][NUM_MACHINE_MODES];
103
static int neg_cost[2][NUM_MACHINE_MODES];
104
static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
105
static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
106
static int shiftsub0_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
107
static int shiftsub1_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
108
static int mul_cost[2][NUM_MACHINE_MODES];
109
static int sdiv_cost[2][NUM_MACHINE_MODES];
110
static int udiv_cost[2][NUM_MACHINE_MODES];
111
static int mul_widen_cost[2][NUM_MACHINE_MODES];
112
static int mul_highpart_cost[2][NUM_MACHINE_MODES];
113
 
114
void
115
init_expmed (void)
116
{
117
  struct
118
  {
119
    struct rtx_def reg;         rtunion reg_fld[2];
120
    struct rtx_def plus;        rtunion plus_fld1;
121
    struct rtx_def neg;
122
    struct rtx_def mult;        rtunion mult_fld1;
123
    struct rtx_def sdiv;        rtunion sdiv_fld1;
124
    struct rtx_def udiv;        rtunion udiv_fld1;
125
    struct rtx_def zext;
126
    struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
127
    struct rtx_def smod_32;     rtunion smod_32_fld1;
128
    struct rtx_def wide_mult;   rtunion wide_mult_fld1;
129
    struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
130
    struct rtx_def wide_trunc;
131
    struct rtx_def shift;       rtunion shift_fld1;
132
    struct rtx_def shift_mult;  rtunion shift_mult_fld1;
133
    struct rtx_def shift_add;   rtunion shift_add_fld1;
134
    struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
135
    struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
136
  } all;
137
 
138
  rtx pow2[MAX_BITS_PER_WORD];
139
  rtx cint[MAX_BITS_PER_WORD];
140
  int m, n;
141
  enum machine_mode mode, wider_mode;
142
  int speed;
143
 
144
 
145
  for (m = 1; m < MAX_BITS_PER_WORD; m++)
146
    {
147
      pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
148
      cint[m] = GEN_INT (m);
149
    }
150
  memset (&all, 0, sizeof all);
151
 
152
  PUT_CODE (&all.reg, REG);
153
  /* Avoid using hard regs in ways which may be unsupported.  */
154
  SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
155
 
156
  PUT_CODE (&all.plus, PLUS);
157
  XEXP (&all.plus, 0) = &all.reg;
158
  XEXP (&all.plus, 1) = &all.reg;
159
 
160
  PUT_CODE (&all.neg, NEG);
161
  XEXP (&all.neg, 0) = &all.reg;
162
 
163
  PUT_CODE (&all.mult, MULT);
164
  XEXP (&all.mult, 0) = &all.reg;
165
  XEXP (&all.mult, 1) = &all.reg;
166
 
167
  PUT_CODE (&all.sdiv, DIV);
168
  XEXP (&all.sdiv, 0) = &all.reg;
169
  XEXP (&all.sdiv, 1) = &all.reg;
170
 
171
  PUT_CODE (&all.udiv, UDIV);
172
  XEXP (&all.udiv, 0) = &all.reg;
173
  XEXP (&all.udiv, 1) = &all.reg;
174
 
175
  PUT_CODE (&all.sdiv_32, DIV);
176
  XEXP (&all.sdiv_32, 0) = &all.reg;
177
  XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
178
 
179
  PUT_CODE (&all.smod_32, MOD);
180
  XEXP (&all.smod_32, 0) = &all.reg;
181
  XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
182
 
183
  PUT_CODE (&all.zext, ZERO_EXTEND);
184
  XEXP (&all.zext, 0) = &all.reg;
185
 
186
  PUT_CODE (&all.wide_mult, MULT);
187
  XEXP (&all.wide_mult, 0) = &all.zext;
188
  XEXP (&all.wide_mult, 1) = &all.zext;
189
 
190
  PUT_CODE (&all.wide_lshr, LSHIFTRT);
191
  XEXP (&all.wide_lshr, 0) = &all.wide_mult;
192
 
193
  PUT_CODE (&all.wide_trunc, TRUNCATE);
194
  XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
195
 
196
  PUT_CODE (&all.shift, ASHIFT);
197
  XEXP (&all.shift, 0) = &all.reg;
198
 
199
  PUT_CODE (&all.shift_mult, MULT);
200
  XEXP (&all.shift_mult, 0) = &all.reg;
201
 
202
  PUT_CODE (&all.shift_add, PLUS);
203
  XEXP (&all.shift_add, 0) = &all.shift_mult;
204
  XEXP (&all.shift_add, 1) = &all.reg;
205
 
206
  PUT_CODE (&all.shift_sub0, MINUS);
207
  XEXP (&all.shift_sub0, 0) = &all.shift_mult;
208
  XEXP (&all.shift_sub0, 1) = &all.reg;
209
 
210
  PUT_CODE (&all.shift_sub1, MINUS);
211
  XEXP (&all.shift_sub1, 0) = &all.reg;
212
  XEXP (&all.shift_sub1, 1) = &all.shift_mult;
213
 
214
  for (speed = 0; speed < 2; speed++)
215
    {
216
      crtl->maybe_hot_insn_p = speed;
217
      zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
218
 
219
      for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
220
           mode != VOIDmode;
221
           mode = GET_MODE_WIDER_MODE (mode))
222
        {
223
          PUT_MODE (&all.reg, mode);
224
          PUT_MODE (&all.plus, mode);
225
          PUT_MODE (&all.neg, mode);
226
          PUT_MODE (&all.mult, mode);
227
          PUT_MODE (&all.sdiv, mode);
228
          PUT_MODE (&all.udiv, mode);
229
          PUT_MODE (&all.sdiv_32, mode);
230
          PUT_MODE (&all.smod_32, mode);
231
          PUT_MODE (&all.wide_trunc, mode);
232
          PUT_MODE (&all.shift, mode);
233
          PUT_MODE (&all.shift_mult, mode);
234
          PUT_MODE (&all.shift_add, mode);
235
          PUT_MODE (&all.shift_sub0, mode);
236
          PUT_MODE (&all.shift_sub1, mode);
237
 
238
          add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
239
          neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
240
          mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
241
          sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
242
          udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
243
 
244
          sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
245
                                          <= 2 * add_cost[speed][mode]);
246
          smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
247
                                          <= 4 * add_cost[speed][mode]);
248
 
249
          wider_mode = GET_MODE_WIDER_MODE (mode);
250
          if (wider_mode != VOIDmode)
251
            {
252
              PUT_MODE (&all.zext, wider_mode);
253
              PUT_MODE (&all.wide_mult, wider_mode);
254
              PUT_MODE (&all.wide_lshr, wider_mode);
255
              XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
256
 
257
              mul_widen_cost[speed][wider_mode]
258
                = rtx_cost (&all.wide_mult, SET, speed);
259
              mul_highpart_cost[speed][mode]
260
                = rtx_cost (&all.wide_trunc, SET, speed);
261
            }
262
 
263
          shift_cost[speed][mode][0] = 0;
264
          shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
265
            = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
266
 
267
          n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
268
          for (m = 1; m < n; m++)
269
            {
270
              XEXP (&all.shift, 1) = cint[m];
271
              XEXP (&all.shift_mult, 1) = pow2[m];
272
 
273
              shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
274
              shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
275
              shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
276
              shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
277
            }
278
        }
279
    }
280
  default_rtl_profile ();
281
}
282
 
283
/* Return an rtx representing minus the value of X.
284
   MODE is the intended mode of the result,
285
   useful if X is a CONST_INT.  */
286
 
287
rtx
288
negate_rtx (enum machine_mode mode, rtx x)
289
{
290
  rtx result = simplify_unary_operation (NEG, mode, x, mode);
291
 
292
  if (result == 0)
293
    result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
294
 
295
  return result;
296
}
297
 
298
/* Report on the availability of insv/extv/extzv and the desired mode
299
   of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
300
   is false; else the mode of the specified operand.  If OPNO is -1,
301
   all the caller cares about is whether the insn is available.  */
302
enum machine_mode
303
mode_for_extraction (enum extraction_pattern pattern, int opno)
304
{
305
  const struct insn_data *data;
306
 
307
  switch (pattern)
308
    {
309
    case EP_insv:
310
      if (HAVE_insv)
311
        {
312
          data = &insn_data[CODE_FOR_insv];
313
          break;
314
        }
315
      return MAX_MACHINE_MODE;
316
 
317
    case EP_extv:
318
      if (HAVE_extv)
319
        {
320
          data = &insn_data[CODE_FOR_extv];
321
          break;
322
        }
323
      return MAX_MACHINE_MODE;
324
 
325
    case EP_extzv:
326
      if (HAVE_extzv)
327
        {
328
          data = &insn_data[CODE_FOR_extzv];
329
          break;
330
        }
331
      return MAX_MACHINE_MODE;
332
 
333
    default:
334
      gcc_unreachable ();
335
    }
336
 
337
  if (opno == -1)
338
    return VOIDmode;
339
 
340
  /* Everyone who uses this function used to follow it with
341
     if (result == VOIDmode) result = word_mode; */
342
  if (data->operand[opno].mode == VOIDmode)
343
    return word_mode;
344
  return data->operand[opno].mode;
345
}
346
 
347
/* Return true if X, of mode MODE, matches the predicate for operand
348
   OPNO of instruction ICODE.  Allow volatile memories, regardless of
349
   the ambient volatile_ok setting.  */
350
 
351
static bool
352
check_predicate_volatile_ok (enum insn_code icode, int opno,
353
                             rtx x, enum machine_mode mode)
354
{
355
  bool save_volatile_ok, result;
356
 
357
  save_volatile_ok = volatile_ok;
358
  result = insn_data[(int) icode].operand[opno].predicate (x, mode);
359
  volatile_ok = save_volatile_ok;
360
  return result;
361
}
362
 
363
/* A subroutine of store_bit_field, with the same arguments.  Return true
364
   if the operation could be implemented.
365
 
366
   If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
367
   no other way of implementing the operation.  If FALLBACK_P is false,
368
   return false instead.  */
369
 
370
static bool
371
store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
372
                   unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
373
                   rtx value, bool fallback_p)
374
{
375
  unsigned int unit
376
    = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
377
  unsigned HOST_WIDE_INT offset, bitpos;
378
  rtx op0 = str_rtx;
379
  int byte_offset;
380
  rtx orig_value;
381
 
382
  enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
383
 
384
  while (GET_CODE (op0) == SUBREG)
385
    {
386
      /* The following line once was done only if WORDS_BIG_ENDIAN,
387
         but I think that is a mistake.  WORDS_BIG_ENDIAN is
388
         meaningful at a much higher level; when structures are copied
389
         between memory and regs, the higher-numbered regs
390
         always get higher addresses.  */
391
      int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
392
      int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
393
 
394
      byte_offset = 0;
395
 
396
      /* Paradoxical subregs need special handling on big endian machines.  */
397
      if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
398
        {
399
          int difference = inner_mode_size - outer_mode_size;
400
 
401
          if (WORDS_BIG_ENDIAN)
402
            byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
403
          if (BYTES_BIG_ENDIAN)
404
            byte_offset += difference % UNITS_PER_WORD;
405
        }
406
      else
407
        byte_offset = SUBREG_BYTE (op0);
408
 
409
      bitnum += byte_offset * BITS_PER_UNIT;
410
      op0 = SUBREG_REG (op0);
411
    }
412
 
413
  /* No action is needed if the target is a register and if the field
414
     lies completely outside that register.  This can occur if the source
415
     code contains an out-of-bounds access to a small array.  */
416
  if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
417
    return true;
418
 
419
  /* Use vec_set patterns for inserting parts of vectors whenever
420
     available.  */
421
  if (VECTOR_MODE_P (GET_MODE (op0))
422
      && !MEM_P (op0)
423
      && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
424
          != CODE_FOR_nothing)
425
      && fieldmode == GET_MODE_INNER (GET_MODE (op0))
426
      && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
427
      && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
428
    {
429
      enum machine_mode outermode = GET_MODE (op0);
430
      enum machine_mode innermode = GET_MODE_INNER (outermode);
431
      int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
432
      int pos = bitnum / GET_MODE_BITSIZE (innermode);
433
      rtx rtxpos = GEN_INT (pos);
434
      rtx src = value;
435
      rtx dest = op0;
436
      rtx pat, seq;
437
      enum machine_mode mode0 = insn_data[icode].operand[0].mode;
438
      enum machine_mode mode1 = insn_data[icode].operand[1].mode;
439
      enum machine_mode mode2 = insn_data[icode].operand[2].mode;
440
 
441
      start_sequence ();
442
 
443
      if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
444
        src = copy_to_mode_reg (mode1, src);
445
 
446
      if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
447
        rtxpos = copy_to_mode_reg (mode1, rtxpos);
448
 
449
      /* We could handle this, but we should always be called with a pseudo
450
         for our targets and all insns should take them as outputs.  */
451
      gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
452
                  && (*insn_data[icode].operand[1].predicate) (src, mode1)
453
                  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
454
      pat = GEN_FCN (icode) (dest, src, rtxpos);
455
      seq = get_insns ();
456
      end_sequence ();
457
      if (pat)
458
        {
459
          emit_insn (seq);
460
          emit_insn (pat);
461
          return true;
462
        }
463
    }
464
 
465
  /* If the target is a register, overwriting the entire object, or storing
466
     a full-word or multi-word field can be done with just a SUBREG.
467
 
468
     If the target is memory, storing any naturally aligned field can be
469
     done with a simple store.  For targets that support fast unaligned
470
     memory, any naturally sized, unit aligned field can be done directly.  */
471
 
472
  offset = bitnum / unit;
473
  bitpos = bitnum % unit;
474
  byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
475
                + (offset * UNITS_PER_WORD);
476
 
477
  if (bitpos == 0
478
      && bitsize == GET_MODE_BITSIZE (fieldmode)
479
      && (!MEM_P (op0)
480
          ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
481
             || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
482
             && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
483
          : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
484
             || (offset * BITS_PER_UNIT % bitsize == 0
485
                 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
486
    {
487
      if (MEM_P (op0))
488
        op0 = adjust_address (op0, fieldmode, offset);
489
      else if (GET_MODE (op0) != fieldmode)
490
        op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
491
                                   byte_offset);
492
      emit_move_insn (op0, value);
493
      return true;
494
    }
495
 
496
  /* Make sure we are playing with integral modes.  Pun with subregs
497
     if we aren't.  This must come after the entire register case above,
498
     since that case is valid for any mode.  The following cases are only
499
     valid for integral modes.  */
500
  {
501
    enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
502
    if (imode != GET_MODE (op0))
503
      {
504
        if (MEM_P (op0))
505
          op0 = adjust_address (op0, imode, 0);
506
        else
507
          {
508
            gcc_assert (imode != BLKmode);
509
            op0 = gen_lowpart (imode, op0);
510
          }
511
      }
512
  }
513
 
514
  /* We may be accessing data outside the field, which means
515
     we can alias adjacent data.  */
516
  if (MEM_P (op0))
517
    {
518
      op0 = shallow_copy_rtx (op0);
519
      set_mem_alias_set (op0, 0);
520
      set_mem_expr (op0, 0);
521
    }
522
 
523
  /* If OP0 is a register, BITPOS must count within a word.
524
     But as we have it, it counts within whatever size OP0 now has.
525
     On a bigendian machine, these are not the same, so convert.  */
526
  if (BYTES_BIG_ENDIAN
527
      && !MEM_P (op0)
528
      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
529
    bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
530
 
531
  /* Storing an lsb-aligned field in a register
532
     can be done with a movestrict instruction.  */
533
 
534
  if (!MEM_P (op0)
535
      && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
536
      && bitsize == GET_MODE_BITSIZE (fieldmode)
537
      && (optab_handler (movstrict_optab, fieldmode)->insn_code
538
          != CODE_FOR_nothing))
539
    {
540
      int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
541
      rtx insn;
542
      rtx start = get_last_insn ();
543
      rtx arg0 = op0;
544
 
545
      /* Get appropriate low part of the value being stored.  */
546
      if (CONST_INT_P (value) || REG_P (value))
547
        value = gen_lowpart (fieldmode, value);
548
      else if (!(GET_CODE (value) == SYMBOL_REF
549
                 || GET_CODE (value) == LABEL_REF
550
                 || GET_CODE (value) == CONST))
551
        value = convert_to_mode (fieldmode, value, 0);
552
 
553
      if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
554
        value = copy_to_mode_reg (fieldmode, value);
555
 
556
      if (GET_CODE (op0) == SUBREG)
557
        {
558
          /* Else we've got some float mode source being extracted into
559
             a different float mode destination -- this combination of
560
             subregs results in Severe Tire Damage.  */
561
          gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
562
                      || GET_MODE_CLASS (fieldmode) == MODE_INT
563
                      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
564
          arg0 = SUBREG_REG (op0);
565
        }
566
 
567
      insn = (GEN_FCN (icode)
568
                 (gen_rtx_SUBREG (fieldmode, arg0,
569
                                  (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
570
                                  + (offset * UNITS_PER_WORD)),
571
                                  value));
572
      if (insn)
573
        {
574
          emit_insn (insn);
575
          return true;
576
        }
577
      delete_insns_since (start);
578
    }
579
 
580
  /* Handle fields bigger than a word.  */
581
 
582
  if (bitsize > BITS_PER_WORD)
583
    {
584
      /* Here we transfer the words of the field
585
         in the order least significant first.
586
         This is because the most significant word is the one which may
587
         be less than full.
588
         However, only do that if the value is not BLKmode.  */
589
 
590
      unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
591
      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
592
      unsigned int i;
593
      rtx last;
594
 
595
      /* This is the mode we must force value to, so that there will be enough
596
         subwords to extract.  Note that fieldmode will often (always?) be
597
         VOIDmode, because that is what store_field uses to indicate that this
598
         is a bit field, but passing VOIDmode to operand_subword_force
599
         is not allowed.  */
600
      fieldmode = GET_MODE (value);
601
      if (fieldmode == VOIDmode)
602
        fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
603
 
604
      last = get_last_insn ();
605
      for (i = 0; i < nwords; i++)
606
        {
607
          /* If I is 0, use the low-order word in both field and target;
608
             if I is 1, use the next to lowest word; and so on.  */
609
          unsigned int wordnum = (backwards ? nwords - i - 1 : i);
610
          unsigned int bit_offset = (backwards
611
                                     ? MAX ((int) bitsize - ((int) i + 1)
612
                                            * BITS_PER_WORD,
613
                                            0)
614
                                     : (int) i * BITS_PER_WORD);
615
          rtx value_word = operand_subword_force (value, wordnum, fieldmode);
616
 
617
          if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
618
                                            bitsize - i * BITS_PER_WORD),
619
                                  bitnum + bit_offset, word_mode,
620
                                  value_word, fallback_p))
621
            {
622
              delete_insns_since (last);
623
              return false;
624
            }
625
        }
626
      return true;
627
    }
628
 
629
  /* From here on we can assume that the field to be stored in is
630
     a full-word (whatever type that is), since it is shorter than a word.  */
631
 
632
  /* OFFSET is the number of words or bytes (UNIT says which)
633
     from STR_RTX to the first word or byte containing part of the field.  */
634
 
635
  if (!MEM_P (op0))
636
    {
637
      if (offset != 0
638
          || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
639
        {
640
          if (!REG_P (op0))
641
            {
642
              /* Since this is a destination (lvalue), we can't copy
643
                 it to a pseudo.  We can remove a SUBREG that does not
644
                 change the size of the operand.  Such a SUBREG may
645
                 have been added above.  */
646
              gcc_assert (GET_CODE (op0) == SUBREG
647
                          && (GET_MODE_SIZE (GET_MODE (op0))
648
                              == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
649
              op0 = SUBREG_REG (op0);
650
            }
651
          op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
652
                                op0, (offset * UNITS_PER_WORD));
653
        }
654
      offset = 0;
655
    }
656
 
657
  /* If VALUE has a floating-point or complex mode, access it as an
658
     integer of the corresponding size.  This can occur on a machine
659
     with 64 bit registers that uses SFmode for float.  It can also
660
     occur for unaligned float or complex fields.  */
661
  orig_value = value;
662
  if (GET_MODE (value) != VOIDmode
663
      && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
664
      && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
665
    {
666
      value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
667
      emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
668
    }
669
 
670
  /* Now OFFSET is nonzero only if OP0 is memory
671
     and is therefore always measured in bytes.  */
672
 
673
  if (HAVE_insv
674
      && GET_MODE (value) != BLKmode
675
      && bitsize > 0
676
      && GET_MODE_BITSIZE (op_mode) >= bitsize
677
      && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
678
            && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
679
      && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
680
                                                        VOIDmode)
681
      && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
682
    {
683
      int xbitpos = bitpos;
684
      rtx value1;
685
      rtx xop0 = op0;
686
      rtx last = get_last_insn ();
687
      rtx pat;
688
      bool copy_back = false;
689
 
690
      /* Add OFFSET into OP0's address.  */
691
      if (MEM_P (xop0))
692
        xop0 = adjust_address (xop0, byte_mode, offset);
693
 
694
      /* If xop0 is a register, we need it in OP_MODE
695
         to make it acceptable to the format of insv.  */
696
      if (GET_CODE (xop0) == SUBREG)
697
        /* We can't just change the mode, because this might clobber op0,
698
           and we will need the original value of op0 if insv fails.  */
699
        xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
700
      if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
701
        xop0 = gen_lowpart_SUBREG (op_mode, xop0);
702
 
703
      /* If the destination is a paradoxical subreg such that we need a
704
         truncate to the inner mode, perform the insertion on a temporary and
705
         truncate the result to the original destination.  Note that we can't
706
         just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
707
         X) 0)) is (reg:N X).  */
708
      if (GET_CODE (xop0) == SUBREG
709
          && REG_P (SUBREG_REG (xop0))
710
          && (!TRULY_NOOP_TRUNCATION
711
              (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))),
712
               GET_MODE_BITSIZE (op_mode))))
713
        {
714
          rtx tem = gen_reg_rtx (op_mode);
715
          emit_move_insn (tem, xop0);
716
          xop0 = tem;
717
          copy_back = true;
718
        }
719
 
720
      /* On big-endian machines, we count bits from the most significant.
721
         If the bit field insn does not, we must invert.  */
722
 
723
      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
724
        xbitpos = unit - bitsize - xbitpos;
725
 
726
      /* We have been counting XBITPOS within UNIT.
727
         Count instead within the size of the register.  */
728
      if (BITS_BIG_ENDIAN && !MEM_P (xop0))
729
        xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
730
 
731
      unit = GET_MODE_BITSIZE (op_mode);
732
 
733
      /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
734
      value1 = value;
735
      if (GET_MODE (value) != op_mode)
736
        {
737
          if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
738
            {
739
              /* Optimization: Don't bother really extending VALUE
740
                 if it has all the bits we will actually use.  However,
741
                 if we must narrow it, be sure we do it correctly.  */
742
 
743
              if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
744
                {
745
                  rtx tmp;
746
 
747
                  tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
748
                  if (! tmp)
749
                    tmp = simplify_gen_subreg (op_mode,
750
                                               force_reg (GET_MODE (value),
751
                                                          value1),
752
                                               GET_MODE (value), 0);
753
                  value1 = tmp;
754
                }
755
              else
756
                value1 = gen_lowpart (op_mode, value1);
757
            }
758
          else if (CONST_INT_P (value))
759
            value1 = gen_int_mode (INTVAL (value), op_mode);
760
          else
761
            /* Parse phase is supposed to make VALUE's data type
762
               match that of the component reference, which is a type
763
               at least as wide as the field; so VALUE should have
764
               a mode that corresponds to that type.  */
765
            gcc_assert (CONSTANT_P (value));
766
        }
767
 
768
      /* If this machine's insv insists on a register,
769
         get VALUE1 into a register.  */
770
      if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
771
             (value1, op_mode)))
772
        value1 = force_reg (op_mode, value1);
773
 
774
      pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
775
      if (pat)
776
        {
777
          emit_insn (pat);
778
 
779
          if (copy_back)
780
            convert_move (op0, xop0, true);
781
          return true;
782
        }
783
      delete_insns_since (last);
784
    }
785
 
786
  /* If OP0 is a memory, try copying it to a register and seeing if a
787
     cheap register alternative is available.  */
788
  if (HAVE_insv && MEM_P (op0))
789
    {
790
      enum machine_mode bestmode;
791
 
792
      /* Get the mode to use for inserting into this field.  If OP0 is
793
         BLKmode, get the smallest mode consistent with the alignment. If
794
         OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
795
         mode. Otherwise, use the smallest mode containing the field.  */
796
 
797
      if (GET_MODE (op0) == BLKmode
798
          || (op_mode != MAX_MACHINE_MODE
799
              && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
800
        bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
801
                                  (op_mode == MAX_MACHINE_MODE
802
                                   ? VOIDmode : op_mode),
803
                                  MEM_VOLATILE_P (op0));
804
      else
805
        bestmode = GET_MODE (op0);
806
 
807
      if (bestmode != VOIDmode
808
          && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
809
          && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
810
               && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
811
        {
812
          rtx last, tempreg, xop0;
813
          unsigned HOST_WIDE_INT xoffset, xbitpos;
814
 
815
          last = get_last_insn ();
816
 
817
          /* Adjust address to point to the containing unit of
818
             that mode.  Compute the offset as a multiple of this unit,
819
             counting in bytes.  */
820
          unit = GET_MODE_BITSIZE (bestmode);
821
          xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
822
          xbitpos = bitnum % unit;
823
          xop0 = adjust_address (op0, bestmode, xoffset);
824
 
825
          /* Fetch that unit, store the bitfield in it, then store
826
             the unit.  */
827
          tempreg = copy_to_reg (xop0);
828
          if (store_bit_field_1 (tempreg, bitsize, xbitpos,
829
                                 fieldmode, orig_value, false))
830
            {
831
              emit_move_insn (xop0, tempreg);
832
              return true;
833
            }
834
          delete_insns_since (last);
835
        }
836
    }
837
 
838
  if (!fallback_p)
839
    return false;
840
 
841
  store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
842
  return true;
843
}
844
 
845
/* Generate code to store value from rtx VALUE
846
   into a bit-field within structure STR_RTX
847
   containing BITSIZE bits starting at bit BITNUM.
848
   FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
849
 
850
void
851
store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
852
                 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
853
                 rtx value)
854
{
855
  if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
856
    gcc_unreachable ();
857
}
858
 
859
/* Use shifts and boolean operations to store VALUE
860
   into a bit field of width BITSIZE
861
   in a memory location specified by OP0 except offset by OFFSET bytes.
862
     (OFFSET must be 0 if OP0 is a register.)
863
   The field starts at position BITPOS within the byte.
864
    (If OP0 is a register, it may be a full word or a narrower mode,
865
     but BITPOS still counts within a full word,
866
     which is significant on bigendian machines.)  */
867
 
868
static void
869
store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
870
                       unsigned HOST_WIDE_INT bitsize,
871
                       unsigned HOST_WIDE_INT bitpos, rtx value)
872
{
873
  enum machine_mode mode;
874
  unsigned int total_bits = BITS_PER_WORD;
875
  rtx temp;
876
  int all_zero = 0;
877
  int all_one = 0;
878
 
879
  /* There is a case not handled here:
880
     a structure with a known alignment of just a halfword
881
     and a field split across two aligned halfwords within the structure.
882
     Or likewise a structure with a known alignment of just a byte
883
     and a field split across two bytes.
884
     Such cases are not supposed to be able to occur.  */
885
 
886
  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
887
    {
888
      gcc_assert (!offset);
889
      /* Special treatment for a bit field split across two registers.  */
890
      if (bitsize + bitpos > BITS_PER_WORD)
891
        {
892
          store_split_bit_field (op0, bitsize, bitpos, value);
893
          return;
894
        }
895
    }
896
  else
897
    {
898
      /* Get the proper mode to use for this field.  We want a mode that
899
         includes the entire field.  If such a mode would be larger than
900
         a word, we won't be doing the extraction the normal way.
901
         We don't want a mode bigger than the destination.  */
902
 
903
      mode = GET_MODE (op0);
904
      if (GET_MODE_BITSIZE (mode) == 0
905
          || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
906
        mode = word_mode;
907
      mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
908
                            MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
909
 
910
      if (mode == VOIDmode)
911
        {
912
          /* The only way this should occur is if the field spans word
913
             boundaries.  */
914
          store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
915
                                 value);
916
          return;
917
        }
918
 
919
      total_bits = GET_MODE_BITSIZE (mode);
920
 
921
      /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
922
         be in the range 0 to total_bits-1, and put any excess bytes in
923
         OFFSET.  */
924
      if (bitpos >= total_bits)
925
        {
926
          offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
927
          bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
928
                     * BITS_PER_UNIT);
929
        }
930
 
931
      /* Get ref to an aligned byte, halfword, or word containing the field.
932
         Adjust BITPOS to be position within a word,
933
         and OFFSET to be the offset of that word.
934
         Then alter OP0 to refer to that word.  */
935
      bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
936
      offset -= (offset % (total_bits / BITS_PER_UNIT));
937
      op0 = adjust_address (op0, mode, offset);
938
    }
939
 
940
  mode = GET_MODE (op0);
941
 
942
  /* Now MODE is either some integral mode for a MEM as OP0,
943
     or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
944
     The bit field is contained entirely within OP0.
945
     BITPOS is the starting bit number within OP0.
946
     (OP0's mode may actually be narrower than MODE.)  */
947
 
948
  if (BYTES_BIG_ENDIAN)
949
      /* BITPOS is the distance between our msb
950
         and that of the containing datum.
951
         Convert it to the distance from the lsb.  */
952
      bitpos = total_bits - bitsize - bitpos;
953
 
954
  /* Now BITPOS is always the distance between our lsb
955
     and that of OP0.  */
956
 
957
  /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
958
     we must first convert its mode to MODE.  */
959
 
960
  if (CONST_INT_P (value))
961
    {
962
      HOST_WIDE_INT v = INTVAL (value);
963
 
964
      if (bitsize < HOST_BITS_PER_WIDE_INT)
965
        v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
966
 
967
      if (v == 0)
968
        all_zero = 1;
969
      else if ((bitsize < HOST_BITS_PER_WIDE_INT
970
                && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
971
               || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
972
        all_one = 1;
973
 
974
      value = lshift_value (mode, value, bitpos, bitsize);
975
    }
976
  else
977
    {
978
      int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
979
                      && bitpos + bitsize != GET_MODE_BITSIZE (mode));
980
 
981
      if (GET_MODE (value) != mode)
982
        value = convert_to_mode (mode, value, 1);
983
 
984
      if (must_and)
985
        value = expand_binop (mode, and_optab, value,
986
                              mask_rtx (mode, 0, bitsize, 0),
987
                              NULL_RTX, 1, OPTAB_LIB_WIDEN);
988
      if (bitpos > 0)
989
        value = expand_shift (LSHIFT_EXPR, mode, value,
990
                              build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
991
    }
992
 
993
  /* Now clear the chosen bits in OP0,
994
     except that if VALUE is -1 we need not bother.  */
995
  /* We keep the intermediates in registers to allow CSE to combine
996
     consecutive bitfield assignments.  */
997
 
998
  temp = force_reg (mode, op0);
999
 
1000
  if (! all_one)
1001
    {
1002
      temp = expand_binop (mode, and_optab, temp,
1003
                           mask_rtx (mode, bitpos, bitsize, 1),
1004
                           NULL_RTX, 1, OPTAB_LIB_WIDEN);
1005
      temp = force_reg (mode, temp);
1006
    }
1007
 
1008
  /* Now logical-or VALUE into OP0, unless it is zero.  */
1009
 
1010
  if (! all_zero)
1011
    {
1012
      temp = expand_binop (mode, ior_optab, temp, value,
1013
                           NULL_RTX, 1, OPTAB_LIB_WIDEN);
1014
      temp = force_reg (mode, temp);
1015
    }
1016
 
1017
  if (op0 != temp)
1018
    {
1019
      op0 = copy_rtx (op0);
1020
      emit_move_insn (op0, temp);
1021
    }
1022
}
1023
 
1024
/* Store a bit field that is split across multiple accessible memory objects.
1025
 
1026
   OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1027
   BITSIZE is the field width; BITPOS the position of its first bit
1028
   (within the word).
1029
   VALUE is the value to store.
1030
 
1031
   This does not yet handle fields wider than BITS_PER_WORD.  */
1032
 
1033
static void
1034
store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1035
                       unsigned HOST_WIDE_INT bitpos, rtx value)
1036
{
1037
  unsigned int unit;
1038
  unsigned int bitsdone = 0;
1039
 
1040
  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1041
     much at a time.  */
1042
  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1043
    unit = BITS_PER_WORD;
1044
  else
1045
    unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1046
 
1047
  /* If VALUE is a constant other than a CONST_INT, get it into a register in
1048
     WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1049
     that VALUE might be a floating-point constant.  */
1050
  if (CONSTANT_P (value) && !CONST_INT_P (value))
1051
    {
1052
      rtx word = gen_lowpart_common (word_mode, value);
1053
 
1054
      if (word && (value != word))
1055
        value = word;
1056
      else
1057
        value = gen_lowpart_common (word_mode,
1058
                                    force_reg (GET_MODE (value) != VOIDmode
1059
                                               ? GET_MODE (value)
1060
                                               : word_mode, value));
1061
    }
1062
 
1063
  while (bitsdone < bitsize)
1064
    {
1065
      unsigned HOST_WIDE_INT thissize;
1066
      rtx part, word;
1067
      unsigned HOST_WIDE_INT thispos;
1068
      unsigned HOST_WIDE_INT offset;
1069
 
1070
      offset = (bitpos + bitsdone) / unit;
1071
      thispos = (bitpos + bitsdone) % unit;
1072
 
1073
      /* THISSIZE must not overrun a word boundary.  Otherwise,
1074
         store_fixed_bit_field will call us again, and we will mutually
1075
         recurse forever.  */
1076
      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1077
      thissize = MIN (thissize, unit - thispos);
1078
 
1079
      if (BYTES_BIG_ENDIAN)
1080
        {
1081
          int total_bits;
1082
 
1083
          /* We must do an endian conversion exactly the same way as it is
1084
             done in extract_bit_field, so that the two calls to
1085
             extract_fixed_bit_field will have comparable arguments.  */
1086
          if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1087
            total_bits = BITS_PER_WORD;
1088
          else
1089
            total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1090
 
1091
          /* Fetch successively less significant portions.  */
1092
          if (CONST_INT_P (value))
1093
            part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1094
                             >> (bitsize - bitsdone - thissize))
1095
                            & (((HOST_WIDE_INT) 1 << thissize) - 1));
1096
          else
1097
            /* The args are chosen so that the last part includes the
1098
               lsb.  Give extract_bit_field the value it needs (with
1099
               endianness compensation) to fetch the piece we want.  */
1100
            part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1101
                                            total_bits - bitsize + bitsdone,
1102
                                            NULL_RTX, 1);
1103
        }
1104
      else
1105
        {
1106
          /* Fetch successively more significant portions.  */
1107
          if (CONST_INT_P (value))
1108
            part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1109
                             >> bitsdone)
1110
                            & (((HOST_WIDE_INT) 1 << thissize) - 1));
1111
          else
1112
            part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1113
                                            bitsdone, NULL_RTX, 1);
1114
        }
1115
 
1116
      /* If OP0 is a register, then handle OFFSET here.
1117
 
1118
         When handling multiword bitfields, extract_bit_field may pass
1119
         down a word_mode SUBREG of a larger REG for a bitfield that actually
1120
         crosses a word boundary.  Thus, for a SUBREG, we must find
1121
         the current word starting from the base register.  */
1122
      if (GET_CODE (op0) == SUBREG)
1123
        {
1124
          int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1125
          word = operand_subword_force (SUBREG_REG (op0), word_offset,
1126
                                        GET_MODE (SUBREG_REG (op0)));
1127
          offset = 0;
1128
        }
1129
      else if (REG_P (op0))
1130
        {
1131
          word = operand_subword_force (op0, offset, GET_MODE (op0));
1132
          offset = 0;
1133
        }
1134
      else
1135
        word = op0;
1136
 
1137
      /* OFFSET is in UNITs, and UNIT is in bits.
1138
         store_fixed_bit_field wants offset in bytes.  */
1139
      store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1140
                             thispos, part);
1141
      bitsdone += thissize;
1142
    }
1143
}
1144
 
1145
/* A subroutine of extract_bit_field_1 that converts return value X
1146
   to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1147
   to extract_bit_field.  */
1148
 
1149
static rtx
1150
convert_extracted_bit_field (rtx x, enum machine_mode mode,
1151
                             enum machine_mode tmode, bool unsignedp)
1152
{
1153
  if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1154
    return x;
1155
 
1156
  /* If the x mode is not a scalar integral, first convert to the
1157
     integer mode of that size and then access it as a floating-point
1158
     value via a SUBREG.  */
1159
  if (!SCALAR_INT_MODE_P (tmode))
1160
    {
1161
      enum machine_mode smode;
1162
 
1163
      smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1164
      x = convert_to_mode (smode, x, unsignedp);
1165
      x = force_reg (smode, x);
1166
      return gen_lowpart (tmode, x);
1167
    }
1168
 
1169
  return convert_to_mode (tmode, x, unsignedp);
1170
}
1171
 
1172
/* A subroutine of extract_bit_field, with the same arguments.
1173
   If FALLBACK_P is true, fall back to extract_fixed_bit_field
1174
   if we can find no other means of implementing the operation.
1175
   if FALLBACK_P is false, return NULL instead.  */
1176
 
1177
static rtx
1178
extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1179
                     unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1180
                     enum machine_mode mode, enum machine_mode tmode,
1181
                     bool fallback_p)
1182
{
1183
  unsigned int unit
1184
    = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1185
  unsigned HOST_WIDE_INT offset, bitpos;
1186
  rtx op0 = str_rtx;
1187
  enum machine_mode int_mode;
1188
  enum machine_mode ext_mode;
1189
  enum machine_mode mode1;
1190
  enum insn_code icode;
1191
  int byte_offset;
1192
 
1193
  if (tmode == VOIDmode)
1194
    tmode = mode;
1195
 
1196
  while (GET_CODE (op0) == SUBREG)
1197
    {
1198
      bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1199
      op0 = SUBREG_REG (op0);
1200
    }
1201
 
1202
  /* If we have an out-of-bounds access to a register, just return an
1203
     uninitialized register of the required mode.  This can occur if the
1204
     source code contains an out-of-bounds access to a small array.  */
1205
  if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1206
    return gen_reg_rtx (tmode);
1207
 
1208
  if (REG_P (op0)
1209
      && mode == GET_MODE (op0)
1210
      && bitnum == 0
1211
      && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1212
    {
1213
      /* We're trying to extract a full register from itself.  */
1214
      return op0;
1215
    }
1216
 
1217
  /* See if we can get a better vector mode before extracting.  */
1218
  if (VECTOR_MODE_P (GET_MODE (op0))
1219
      && !MEM_P (op0)
1220
      && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1221
    {
1222
      enum machine_mode new_mode;
1223
      int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1224
 
1225
      if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1226
        new_mode = MIN_MODE_VECTOR_FLOAT;
1227
      else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1228
        new_mode = MIN_MODE_VECTOR_FRACT;
1229
      else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1230
        new_mode = MIN_MODE_VECTOR_UFRACT;
1231
      else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1232
        new_mode = MIN_MODE_VECTOR_ACCUM;
1233
      else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1234
        new_mode = MIN_MODE_VECTOR_UACCUM;
1235
      else
1236
        new_mode = MIN_MODE_VECTOR_INT;
1237
 
1238
      for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1239
        if (GET_MODE_NUNITS (new_mode) == nunits
1240
            && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1241
            && targetm.vector_mode_supported_p (new_mode))
1242
          break;
1243
      if (new_mode != VOIDmode)
1244
        op0 = gen_lowpart (new_mode, op0);
1245
    }
1246
 
1247
  /* Use vec_extract patterns for extracting parts of vectors whenever
1248
     available.  */
1249
  if (VECTOR_MODE_P (GET_MODE (op0))
1250
      && !MEM_P (op0)
1251
      && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1252
          != CODE_FOR_nothing)
1253
      && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1254
          == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1255
    {
1256
      enum machine_mode outermode = GET_MODE (op0);
1257
      enum machine_mode innermode = GET_MODE_INNER (outermode);
1258
      int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1259
      unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1260
      rtx rtxpos = GEN_INT (pos);
1261
      rtx src = op0;
1262
      rtx dest = NULL, pat, seq;
1263
      enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1264
      enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1265
      enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1266
 
1267
      if (innermode == tmode || innermode == mode)
1268
        dest = target;
1269
 
1270
      if (!dest)
1271
        dest = gen_reg_rtx (innermode);
1272
 
1273
      start_sequence ();
1274
 
1275
      if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1276
        dest = copy_to_mode_reg (mode0, dest);
1277
 
1278
      if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1279
        src = copy_to_mode_reg (mode1, src);
1280
 
1281
      if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1282
        rtxpos = copy_to_mode_reg (mode1, rtxpos);
1283
 
1284
      /* We could handle this, but we should always be called with a pseudo
1285
         for our targets and all insns should take them as outputs.  */
1286
      gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1287
                  && (*insn_data[icode].operand[1].predicate) (src, mode1)
1288
                  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1289
 
1290
      pat = GEN_FCN (icode) (dest, src, rtxpos);
1291
      seq = get_insns ();
1292
      end_sequence ();
1293
      if (pat)
1294
        {
1295
          emit_insn (seq);
1296
          emit_insn (pat);
1297
          if (mode0 != mode)
1298
            return gen_lowpart (tmode, dest);
1299
          return dest;
1300
        }
1301
    }
1302
 
1303
  /* Make sure we are playing with integral modes.  Pun with subregs
1304
     if we aren't.  */
1305
  {
1306
    enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1307
    if (imode != GET_MODE (op0))
1308
      {
1309
        if (MEM_P (op0))
1310
          op0 = adjust_address (op0, imode, 0);
1311
        else if (imode != BLKmode)
1312
          {
1313
            op0 = gen_lowpart (imode, op0);
1314
 
1315
            /* If we got a SUBREG, force it into a register since we
1316
               aren't going to be able to do another SUBREG on it.  */
1317
            if (GET_CODE (op0) == SUBREG)
1318
              op0 = force_reg (imode, op0);
1319
          }
1320
        else if (REG_P (op0))
1321
          {
1322
            rtx reg, subreg;
1323
            imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1324
                                            MODE_INT);
1325
            reg = gen_reg_rtx (imode);
1326
            subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1327
            emit_move_insn (subreg, op0);
1328
            op0 = reg;
1329
            bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1330
          }
1331
        else
1332
          {
1333
            rtx mem = assign_stack_temp (GET_MODE (op0),
1334
                                         GET_MODE_SIZE (GET_MODE (op0)), 0);
1335
            emit_move_insn (mem, op0);
1336
            op0 = adjust_address (mem, BLKmode, 0);
1337
          }
1338
      }
1339
  }
1340
 
1341
  /* We may be accessing data outside the field, which means
1342
     we can alias adjacent data.  */
1343
  if (MEM_P (op0))
1344
    {
1345
      op0 = shallow_copy_rtx (op0);
1346
      set_mem_alias_set (op0, 0);
1347
      set_mem_expr (op0, 0);
1348
    }
1349
 
1350
  /* Extraction of a full-word or multi-word value from a structure
1351
     in a register or aligned memory can be done with just a SUBREG.
1352
     A subword value in the least significant part of a register
1353
     can also be extracted with a SUBREG.  For this, we need the
1354
     byte offset of the value in op0.  */
1355
 
1356
  bitpos = bitnum % unit;
1357
  offset = bitnum / unit;
1358
  byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1359
 
1360
  /* If OP0 is a register, BITPOS must count within a word.
1361
     But as we have it, it counts within whatever size OP0 now has.
1362
     On a bigendian machine, these are not the same, so convert.  */
1363
  if (BYTES_BIG_ENDIAN
1364
      && !MEM_P (op0)
1365
      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1366
    bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1367
 
1368
  /* ??? We currently assume TARGET is at least as big as BITSIZE.
1369
     If that's wrong, the solution is to test for it and set TARGET to 0
1370
     if needed.  */
1371
 
1372
  /* Only scalar integer modes can be converted via subregs.  There is an
1373
     additional problem for FP modes here in that they can have a precision
1374
     which is different from the size.  mode_for_size uses precision, but
1375
     we want a mode based on the size, so we must avoid calling it for FP
1376
     modes.  */
1377
  mode1  = (SCALAR_INT_MODE_P (tmode)
1378
            ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1379
            : mode);
1380
 
1381
  if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1382
        && bitpos % BITS_PER_WORD == 0)
1383
       || (mode1 != BLKmode
1384
           /* ??? The big endian test here is wrong.  This is correct
1385
              if the value is in a register, and if mode_for_size is not
1386
              the same mode as op0.  This causes us to get unnecessarily
1387
              inefficient code from the Thumb port when -mbig-endian.  */
1388
           && (BYTES_BIG_ENDIAN
1389
               ? bitpos + bitsize == BITS_PER_WORD
1390
               : bitpos == 0)))
1391
      && ((!MEM_P (op0)
1392
           && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1393
                                     GET_MODE_BITSIZE (GET_MODE (op0)))
1394
           && GET_MODE_SIZE (mode1) != 0
1395
           && byte_offset % GET_MODE_SIZE (mode1) == 0)
1396
          || (MEM_P (op0)
1397
              && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1398
                  || (offset * BITS_PER_UNIT % bitsize == 0
1399
                      && MEM_ALIGN (op0) % bitsize == 0)))))
1400
    {
1401
      if (MEM_P (op0))
1402
        op0 = adjust_address (op0, mode1, offset);
1403
      else if (mode1 != GET_MODE (op0))
1404
        {
1405
          rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1406
                                         byte_offset);
1407
          if (sub == NULL)
1408
            goto no_subreg_mode_swap;
1409
          op0 = sub;
1410
        }
1411
      if (mode1 != mode)
1412
        return convert_to_mode (tmode, op0, unsignedp);
1413
      return op0;
1414
    }
1415
 no_subreg_mode_swap:
1416
 
1417
  /* Handle fields bigger than a word.  */
1418
 
1419
  if (bitsize > BITS_PER_WORD)
1420
    {
1421
      /* Here we transfer the words of the field
1422
         in the order least significant first.
1423
         This is because the most significant word is the one which may
1424
         be less than full.  */
1425
 
1426
      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1427
      unsigned int i;
1428
 
1429
      if (target == 0 || !REG_P (target))
1430
        target = gen_reg_rtx (mode);
1431
 
1432
      /* Indicate for flow that the entire target reg is being set.  */
1433
      emit_clobber (target);
1434
 
1435
      for (i = 0; i < nwords; i++)
1436
        {
1437
          /* If I is 0, use the low-order word in both field and target;
1438
             if I is 1, use the next to lowest word; and so on.  */
1439
          /* Word number in TARGET to use.  */
1440
          unsigned int wordnum
1441
            = (WORDS_BIG_ENDIAN
1442
               ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1443
               : i);
1444
          /* Offset from start of field in OP0.  */
1445
          unsigned int bit_offset = (WORDS_BIG_ENDIAN
1446
                                     ? MAX (0, ((int) bitsize - ((int) i + 1)
1447
                                                * (int) BITS_PER_WORD))
1448
                                     : (int) i * BITS_PER_WORD);
1449
          rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1450
          rtx result_part
1451
            = extract_bit_field (op0, MIN (BITS_PER_WORD,
1452
                                           bitsize - i * BITS_PER_WORD),
1453
                                 bitnum + bit_offset, 1, target_part, mode,
1454
                                 word_mode);
1455
 
1456
          gcc_assert (target_part);
1457
 
1458
          if (result_part != target_part)
1459
            emit_move_insn (target_part, result_part);
1460
        }
1461
 
1462
      if (unsignedp)
1463
        {
1464
          /* Unless we've filled TARGET, the upper regs in a multi-reg value
1465
             need to be zero'd out.  */
1466
          if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1467
            {
1468
              unsigned int i, total_words;
1469
 
1470
              total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1471
              for (i = nwords; i < total_words; i++)
1472
                emit_move_insn
1473
                  (operand_subword (target,
1474
                                    WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1475
                                    1, VOIDmode),
1476
                   const0_rtx);
1477
            }
1478
          return target;
1479
        }
1480
 
1481
      /* Signed bit field: sign-extend with two arithmetic shifts.  */
1482
      target = expand_shift (LSHIFT_EXPR, mode, target,
1483
                             build_int_cst (NULL_TREE,
1484
                                            GET_MODE_BITSIZE (mode) - bitsize),
1485
                             NULL_RTX, 0);
1486
      return expand_shift (RSHIFT_EXPR, mode, target,
1487
                           build_int_cst (NULL_TREE,
1488
                                          GET_MODE_BITSIZE (mode) - bitsize),
1489
                           NULL_RTX, 0);
1490
    }
1491
 
1492
  /* From here on we know the desired field is smaller than a word.  */
1493
 
1494
  /* Check if there is a correspondingly-sized integer field, so we can
1495
     safely extract it as one size of integer, if necessary; then
1496
     truncate or extend to the size that is wanted; then use SUBREGs or
1497
     convert_to_mode to get one of the modes we really wanted.  */
1498
 
1499
  int_mode = int_mode_for_mode (tmode);
1500
  if (int_mode == BLKmode)
1501
    int_mode = int_mode_for_mode (mode);
1502
  /* Should probably push op0 out to memory and then do a load.  */
1503
  gcc_assert (int_mode != BLKmode);
1504
 
1505
  /* OFFSET is the number of words or bytes (UNIT says which)
1506
     from STR_RTX to the first word or byte containing part of the field.  */
1507
  if (!MEM_P (op0))
1508
    {
1509
      if (offset != 0
1510
          || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1511
        {
1512
          if (!REG_P (op0))
1513
            op0 = copy_to_reg (op0);
1514
          op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1515
                                op0, (offset * UNITS_PER_WORD));
1516
        }
1517
      offset = 0;
1518
    }
1519
 
1520
  /* Now OFFSET is nonzero only for memory operands.  */
1521
  ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1522
  icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1523
  if (ext_mode != MAX_MACHINE_MODE
1524
      && bitsize > 0
1525
      && GET_MODE_BITSIZE (ext_mode) >= bitsize
1526
      /* If op0 is a register, we need it in EXT_MODE to make it
1527
         acceptable to the format of ext(z)v.  */
1528
      && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1529
      && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1530
           && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1531
      && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1532
    {
1533
      unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1534
      rtx bitsize_rtx, bitpos_rtx;
1535
      rtx last = get_last_insn ();
1536
      rtx xop0 = op0;
1537
      rtx xtarget = target;
1538
      rtx xspec_target = target;
1539
      rtx xspec_target_subreg = 0;
1540
      rtx pat;
1541
 
1542
      /* If op0 is a register, we need it in EXT_MODE to make it
1543
         acceptable to the format of ext(z)v.  */
1544
      if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1545
        xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1546
      if (MEM_P (xop0))
1547
        /* Get ref to first byte containing part of the field.  */
1548
        xop0 = adjust_address (xop0, byte_mode, xoffset);
1549
 
1550
      /* On big-endian machines, we count bits from the most significant.
1551
         If the bit field insn does not, we must invert.  */
1552
      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1553
        xbitpos = unit - bitsize - xbitpos;
1554
 
1555
      /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1556
      if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1557
        xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1558
 
1559
      unit = GET_MODE_BITSIZE (ext_mode);
1560
 
1561
      if (xtarget == 0)
1562
        xtarget = xspec_target = gen_reg_rtx (tmode);
1563
 
1564
      if (GET_MODE (xtarget) != ext_mode)
1565
        {
1566
          /* Don't use LHS paradoxical subreg if explicit truncation is needed
1567
             between the mode of the extraction (word_mode) and the target
1568
             mode.  Instead, create a temporary and use convert_move to set
1569
             the target.  */
1570
          if (REG_P (xtarget)
1571
              && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)),
1572
                                        GET_MODE_BITSIZE (ext_mode)))
1573
            {
1574
              xtarget = gen_lowpart (ext_mode, xtarget);
1575
              if (GET_MODE_SIZE (ext_mode)
1576
                  > GET_MODE_SIZE (GET_MODE (xspec_target)))
1577
                xspec_target_subreg = xtarget;
1578
            }
1579
          else
1580
            xtarget = gen_reg_rtx (ext_mode);
1581
        }
1582
 
1583
      /* If this machine's ext(z)v insists on a register target,
1584
         make sure we have one.  */
1585
      if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1586
        xtarget = gen_reg_rtx (ext_mode);
1587
 
1588
      bitsize_rtx = GEN_INT (bitsize);
1589
      bitpos_rtx = GEN_INT (xbitpos);
1590
 
1591
      pat = (unsignedp
1592
             ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1593
             : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1594
      if (pat)
1595
        {
1596
          emit_insn (pat);
1597
          if (xtarget == xspec_target)
1598
            return xtarget;
1599
          if (xtarget == xspec_target_subreg)
1600
            return xspec_target;
1601
          return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1602
        }
1603
      delete_insns_since (last);
1604
    }
1605
 
1606
  /* If OP0 is a memory, try copying it to a register and seeing if a
1607
     cheap register alternative is available.  */
1608
  if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1609
    {
1610
      enum machine_mode bestmode;
1611
 
1612
      /* Get the mode to use for inserting into this field.  If
1613
         OP0 is BLKmode, get the smallest mode consistent with the
1614
         alignment. If OP0 is a non-BLKmode object that is no
1615
         wider than EXT_MODE, use its mode. Otherwise, use the
1616
         smallest mode containing the field.  */
1617
 
1618
      if (GET_MODE (op0) == BLKmode
1619
          || (ext_mode != MAX_MACHINE_MODE
1620
              && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1621
        bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1622
                                  (ext_mode == MAX_MACHINE_MODE
1623
                                   ? VOIDmode : ext_mode),
1624
                                  MEM_VOLATILE_P (op0));
1625
      else
1626
        bestmode = GET_MODE (op0);
1627
 
1628
      if (bestmode != VOIDmode
1629
          && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1630
               && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1631
        {
1632
          unsigned HOST_WIDE_INT xoffset, xbitpos;
1633
 
1634
          /* Compute the offset as a multiple of this unit,
1635
             counting in bytes.  */
1636
          unit = GET_MODE_BITSIZE (bestmode);
1637
          xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1638
          xbitpos = bitnum % unit;
1639
 
1640
          /* Make sure the register is big enough for the whole field.  */
1641
          if (xoffset * BITS_PER_UNIT + unit
1642
              >= offset * BITS_PER_UNIT + bitsize)
1643
            {
1644
              rtx last, result, xop0;
1645
 
1646
              last = get_last_insn ();
1647
 
1648
              /* Fetch it to a register in that size.  */
1649
              xop0 = adjust_address (op0, bestmode, xoffset);
1650
              xop0 = force_reg (bestmode, xop0);
1651
              result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1652
                                            unsignedp, target,
1653
                                            mode, tmode, false);
1654
              if (result)
1655
                return result;
1656
 
1657
              delete_insns_since (last);
1658
            }
1659
        }
1660
    }
1661
 
1662
  if (!fallback_p)
1663
    return NULL;
1664
 
1665
  target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1666
                                    bitpos, target, unsignedp);
1667
  return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1668
}
1669
 
1670
/* Generate code to extract a byte-field from STR_RTX
1671
   containing BITSIZE bits, starting at BITNUM,
1672
   and put it in TARGET if possible (if TARGET is nonzero).
1673
   Regardless of TARGET, we return the rtx for where the value is placed.
1674
 
1675
   STR_RTX is the structure containing the byte (a REG or MEM).
1676
   UNSIGNEDP is nonzero if this is an unsigned bit field.
1677
   MODE is the natural mode of the field value once extracted.
1678
   TMODE is the mode the caller would like the value to have;
1679
   but the value may be returned with type MODE instead.
1680
 
1681
   If a TARGET is specified and we can store in it at no extra cost,
1682
   we do so, and return TARGET.
1683
   Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1684
   if they are equally easy.  */
1685
 
1686
rtx
1687
extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1688
                   unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1689
                   enum machine_mode mode, enum machine_mode tmode)
1690
{
1691
  return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1692
                              target, mode, tmode, true);
1693
}
1694
 
1695
/* Extract a bit field using shifts and boolean operations
1696
   Returns an rtx to represent the value.
1697
   OP0 addresses a register (word) or memory (byte).
1698
   BITPOS says which bit within the word or byte the bit field starts in.
1699
   OFFSET says how many bytes farther the bit field starts;
1700
    it is 0 if OP0 is a register.
1701
   BITSIZE says how many bits long the bit field is.
1702
    (If OP0 is a register, it may be narrower than a full word,
1703
     but BITPOS still counts within a full word,
1704
     which is significant on bigendian machines.)
1705
 
1706
   UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1707
   If TARGET is nonzero, attempts to store the value there
1708
   and return TARGET, but this is not guaranteed.
1709
   If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1710
 
1711
static rtx
1712
extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1713
                         unsigned HOST_WIDE_INT offset,
1714
                         unsigned HOST_WIDE_INT bitsize,
1715
                         unsigned HOST_WIDE_INT bitpos, rtx target,
1716
                         int unsignedp)
1717
{
1718
  unsigned int total_bits = BITS_PER_WORD;
1719
  enum machine_mode mode;
1720
 
1721
  if (GET_CODE (op0) == SUBREG || REG_P (op0))
1722
    {
1723
      /* Special treatment for a bit field split across two registers.  */
1724
      if (bitsize + bitpos > BITS_PER_WORD)
1725
        return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1726
    }
1727
  else
1728
    {
1729
      /* Get the proper mode to use for this field.  We want a mode that
1730
         includes the entire field.  If such a mode would be larger than
1731
         a word, we won't be doing the extraction the normal way.  */
1732
 
1733
      mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1734
                            MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1735
 
1736
      if (mode == VOIDmode)
1737
        /* The only way this should occur is if the field spans word
1738
           boundaries.  */
1739
        return extract_split_bit_field (op0, bitsize,
1740
                                        bitpos + offset * BITS_PER_UNIT,
1741
                                        unsignedp);
1742
 
1743
      total_bits = GET_MODE_BITSIZE (mode);
1744
 
1745
      /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1746
         be in the range 0 to total_bits-1, and put any excess bytes in
1747
         OFFSET.  */
1748
      if (bitpos >= total_bits)
1749
        {
1750
          offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1751
          bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1752
                     * BITS_PER_UNIT);
1753
        }
1754
 
1755
      /* Get ref to an aligned byte, halfword, or word containing the field.
1756
         Adjust BITPOS to be position within a word,
1757
         and OFFSET to be the offset of that word.
1758
         Then alter OP0 to refer to that word.  */
1759
      bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1760
      offset -= (offset % (total_bits / BITS_PER_UNIT));
1761
      op0 = adjust_address (op0, mode, offset);
1762
    }
1763
 
1764
  mode = GET_MODE (op0);
1765
 
1766
  if (BYTES_BIG_ENDIAN)
1767
    /* BITPOS is the distance between our msb and that of OP0.
1768
       Convert it to the distance from the lsb.  */
1769
    bitpos = total_bits - bitsize - bitpos;
1770
 
1771
  /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1772
     We have reduced the big-endian case to the little-endian case.  */
1773
 
1774
  if (unsignedp)
1775
    {
1776
      if (bitpos)
1777
        {
1778
          /* If the field does not already start at the lsb,
1779
             shift it so it does.  */
1780
          tree amount = build_int_cst (NULL_TREE, bitpos);
1781
          /* Maybe propagate the target for the shift.  */
1782
          /* But not if we will return it--could confuse integrate.c.  */
1783
          rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1784
          if (tmode != mode) subtarget = 0;
1785
          op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1786
        }
1787
      /* Convert the value to the desired mode.  */
1788
      if (mode != tmode)
1789
        op0 = convert_to_mode (tmode, op0, 1);
1790
 
1791
      /* Unless the msb of the field used to be the msb when we shifted,
1792
         mask out the upper bits.  */
1793
 
1794
      if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1795
        return expand_binop (GET_MODE (op0), and_optab, op0,
1796
                             mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1797
                             target, 1, OPTAB_LIB_WIDEN);
1798
      return op0;
1799
    }
1800
 
1801
  /* To extract a signed bit-field, first shift its msb to the msb of the word,
1802
     then arithmetic-shift its lsb to the lsb of the word.  */
1803
  op0 = force_reg (mode, op0);
1804
  if (mode != tmode)
1805
    target = 0;
1806
 
1807
  /* Find the narrowest integer mode that contains the field.  */
1808
 
1809
  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1810
       mode = GET_MODE_WIDER_MODE (mode))
1811
    if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1812
      {
1813
        op0 = convert_to_mode (mode, op0, 0);
1814
        break;
1815
      }
1816
 
1817
  if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1818
    {
1819
      tree amount
1820
        = build_int_cst (NULL_TREE,
1821
                         GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1822
      /* Maybe propagate the target for the shift.  */
1823
      rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1824
      op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1825
    }
1826
 
1827
  return expand_shift (RSHIFT_EXPR, mode, op0,
1828
                       build_int_cst (NULL_TREE,
1829
                                      GET_MODE_BITSIZE (mode) - bitsize),
1830
                       target, 0);
1831
}
1832
 
1833
/* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1834
   of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1835
   complement of that if COMPLEMENT.  The mask is truncated if
1836
   necessary to the width of mode MODE.  The mask is zero-extended if
1837
   BITSIZE+BITPOS is too small for MODE.  */
1838
 
1839
static rtx
1840
mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1841
{
1842
  HOST_WIDE_INT masklow, maskhigh;
1843
 
1844
  if (bitsize == 0)
1845
    masklow = 0;
1846
  else if (bitpos < HOST_BITS_PER_WIDE_INT)
1847
    masklow = (HOST_WIDE_INT) -1 << bitpos;
1848
  else
1849
    masklow = 0;
1850
 
1851
  if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1852
    masklow &= ((unsigned HOST_WIDE_INT) -1
1853
                >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1854
 
1855
  if (bitpos <= HOST_BITS_PER_WIDE_INT)
1856
    maskhigh = -1;
1857
  else
1858
    maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1859
 
1860
  if (bitsize == 0)
1861
    maskhigh = 0;
1862
  else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1863
    maskhigh &= ((unsigned HOST_WIDE_INT) -1
1864
                 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1865
  else
1866
    maskhigh = 0;
1867
 
1868
  if (complement)
1869
    {
1870
      maskhigh = ~maskhigh;
1871
      masklow = ~masklow;
1872
    }
1873
 
1874
  return immed_double_const (masklow, maskhigh, mode);
1875
}
1876
 
1877
/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1878
   VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1879
 
1880
static rtx
1881
lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1882
{
1883
  unsigned HOST_WIDE_INT v = INTVAL (value);
1884
  HOST_WIDE_INT low, high;
1885
 
1886
  if (bitsize < HOST_BITS_PER_WIDE_INT)
1887
    v &= ~((HOST_WIDE_INT) -1 << bitsize);
1888
 
1889
  if (bitpos < HOST_BITS_PER_WIDE_INT)
1890
    {
1891
      low = v << bitpos;
1892
      high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1893
    }
1894
  else
1895
    {
1896
      low = 0;
1897
      high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1898
    }
1899
 
1900
  return immed_double_const (low, high, mode);
1901
}
1902
 
1903
/* Extract a bit field that is split across two words
1904
   and return an RTX for the result.
1905
 
1906
   OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1907
   BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1908
   UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1909
 
1910
static rtx
1911
extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1912
                         unsigned HOST_WIDE_INT bitpos, int unsignedp)
1913
{
1914
  unsigned int unit;
1915
  unsigned int bitsdone = 0;
1916
  rtx result = NULL_RTX;
1917
  int first = 1;
1918
 
1919
  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1920
     much at a time.  */
1921
  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1922
    unit = BITS_PER_WORD;
1923
  else
1924
    unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1925
 
1926
  while (bitsdone < bitsize)
1927
    {
1928
      unsigned HOST_WIDE_INT thissize;
1929
      rtx part, word;
1930
      unsigned HOST_WIDE_INT thispos;
1931
      unsigned HOST_WIDE_INT offset;
1932
 
1933
      offset = (bitpos + bitsdone) / unit;
1934
      thispos = (bitpos + bitsdone) % unit;
1935
 
1936
      /* THISSIZE must not overrun a word boundary.  Otherwise,
1937
         extract_fixed_bit_field will call us again, and we will mutually
1938
         recurse forever.  */
1939
      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1940
      thissize = MIN (thissize, unit - thispos);
1941
 
1942
      /* If OP0 is a register, then handle OFFSET here.
1943
 
1944
         When handling multiword bitfields, extract_bit_field may pass
1945
         down a word_mode SUBREG of a larger REG for a bitfield that actually
1946
         crosses a word boundary.  Thus, for a SUBREG, we must find
1947
         the current word starting from the base register.  */
1948
      if (GET_CODE (op0) == SUBREG)
1949
        {
1950
          int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1951
          word = operand_subword_force (SUBREG_REG (op0), word_offset,
1952
                                        GET_MODE (SUBREG_REG (op0)));
1953
          offset = 0;
1954
        }
1955
      else if (REG_P (op0))
1956
        {
1957
          word = operand_subword_force (op0, offset, GET_MODE (op0));
1958
          offset = 0;
1959
        }
1960
      else
1961
        word = op0;
1962
 
1963
      /* Extract the parts in bit-counting order,
1964
         whose meaning is determined by BYTES_PER_UNIT.
1965
         OFFSET is in UNITs, and UNIT is in bits.
1966
         extract_fixed_bit_field wants offset in bytes.  */
1967
      part = extract_fixed_bit_field (word_mode, word,
1968
                                      offset * unit / BITS_PER_UNIT,
1969
                                      thissize, thispos, 0, 1);
1970
      bitsdone += thissize;
1971
 
1972
      /* Shift this part into place for the result.  */
1973
      if (BYTES_BIG_ENDIAN)
1974
        {
1975
          if (bitsize != bitsdone)
1976
            part = expand_shift (LSHIFT_EXPR, word_mode, part,
1977
                                 build_int_cst (NULL_TREE, bitsize - bitsdone),
1978
                                 0, 1);
1979
        }
1980
      else
1981
        {
1982
          if (bitsdone != thissize)
1983
            part = expand_shift (LSHIFT_EXPR, word_mode, part,
1984
                                 build_int_cst (NULL_TREE,
1985
                                                bitsdone - thissize), 0, 1);
1986
        }
1987
 
1988
      if (first)
1989
        result = part;
1990
      else
1991
        /* Combine the parts with bitwise or.  This works
1992
           because we extracted each part as an unsigned bit field.  */
1993
        result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1994
                               OPTAB_LIB_WIDEN);
1995
 
1996
      first = 0;
1997
    }
1998
 
1999
  /* Unsigned bit field: we are done.  */
2000
  if (unsignedp)
2001
    return result;
2002
  /* Signed bit field: sign-extend with two arithmetic shifts.  */
2003
  result = expand_shift (LSHIFT_EXPR, word_mode, result,
2004
                         build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2005
                         NULL_RTX, 0);
2006
  return expand_shift (RSHIFT_EXPR, word_mode, result,
2007
                       build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2008
                       NULL_RTX, 0);
2009
}
2010
 
2011
/* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2012
   the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2013
   MODE, fill the upper bits with zeros.  Fail if the layout of either
2014
   mode is unknown (as for CC modes) or if the extraction would involve
2015
   unprofitable mode punning.  Return the value on success, otherwise
2016
   return null.
2017
 
2018
   This is different from gen_lowpart* in these respects:
2019
 
2020
     - the returned value must always be considered an rvalue
2021
 
2022
     - when MODE is wider than SRC_MODE, the extraction involves
2023
       a zero extension
2024
 
2025
     - when MODE is smaller than SRC_MODE, the extraction involves
2026
       a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2027
 
2028
   In other words, this routine performs a computation, whereas the
2029
   gen_lowpart* routines are conceptually lvalue or rvalue subreg
2030
   operations.  */
2031
 
2032
rtx
2033
extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2034
{
2035
  enum machine_mode int_mode, src_int_mode;
2036
 
2037
  if (mode == src_mode)
2038
    return src;
2039
 
2040
  if (CONSTANT_P (src))
2041
    {
2042
      /* simplify_gen_subreg can't be used here, as if simplify_subreg
2043
         fails, it will happily create (subreg (symbol_ref)) or similar
2044
         invalid SUBREGs.  */
2045
      unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2046
      rtx ret = simplify_subreg (mode, src, src_mode, byte);
2047
      if (ret)
2048
        return ret;
2049
 
2050
      if (GET_MODE (src) == VOIDmode
2051
          || !validate_subreg (mode, src_mode, src, byte))
2052
        return NULL_RTX;
2053
 
2054
      src = force_reg (GET_MODE (src), src);
2055
      return gen_rtx_SUBREG (mode, src, byte);
2056
    }
2057
 
2058
  if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2059
    return NULL_RTX;
2060
 
2061
  if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2062
      && MODES_TIEABLE_P (mode, src_mode))
2063
    {
2064
      rtx x = gen_lowpart_common (mode, src);
2065
      if (x)
2066
        return x;
2067
    }
2068
 
2069
  src_int_mode = int_mode_for_mode (src_mode);
2070
  int_mode = int_mode_for_mode (mode);
2071
  if (src_int_mode == BLKmode || int_mode == BLKmode)
2072
    return NULL_RTX;
2073
 
2074
  if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2075
    return NULL_RTX;
2076
  if (!MODES_TIEABLE_P (int_mode, mode))
2077
    return NULL_RTX;
2078
 
2079
  src = gen_lowpart (src_int_mode, src);
2080
  src = convert_modes (int_mode, src_int_mode, src, true);
2081
  src = gen_lowpart (mode, src);
2082
  return src;
2083
}
2084
 
2085
/* Add INC into TARGET.  */
2086
 
2087
void
2088
expand_inc (rtx target, rtx inc)
2089
{
2090
  rtx value = expand_binop (GET_MODE (target), add_optab,
2091
                            target, inc,
2092
                            target, 0, OPTAB_LIB_WIDEN);
2093
  if (value != target)
2094
    emit_move_insn (target, value);
2095
}
2096
 
2097
/* Subtract DEC from TARGET.  */
2098
 
2099
void
2100
expand_dec (rtx target, rtx dec)
2101
{
2102
  rtx value = expand_binop (GET_MODE (target), sub_optab,
2103
                            target, dec,
2104
                            target, 0, OPTAB_LIB_WIDEN);
2105
  if (value != target)
2106
    emit_move_insn (target, value);
2107
}
2108
 
2109
/* Output a shift instruction for expression code CODE,
2110
   with SHIFTED being the rtx for the value to shift,
2111
   and AMOUNT the tree for the amount to shift by.
2112
   Store the result in the rtx TARGET, if that is convenient.
2113
   If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2114
   Return the rtx for where the value is.  */
2115
 
2116
rtx
2117
expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2118
              tree amount, rtx target, int unsignedp)
2119
{
2120
  rtx op1, temp = 0;
2121
  int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2122
  int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2123
  optab lshift_optab = ashl_optab;
2124
  optab rshift_arith_optab = ashr_optab;
2125
  optab rshift_uns_optab = lshr_optab;
2126
  optab lrotate_optab = rotl_optab;
2127
  optab rrotate_optab = rotr_optab;
2128
  enum machine_mode op1_mode;
2129
  int attempt;
2130
  bool speed = optimize_insn_for_speed_p ();
2131
 
2132
  op1 = expand_normal (amount);
2133
  op1_mode = GET_MODE (op1);
2134
 
2135
  /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2136
     shift amount is a vector, use the vector/vector shift patterns.  */
2137
  if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2138
    {
2139
      lshift_optab = vashl_optab;
2140
      rshift_arith_optab = vashr_optab;
2141
      rshift_uns_optab = vlshr_optab;
2142
      lrotate_optab = vrotl_optab;
2143
      rrotate_optab = vrotr_optab;
2144
    }
2145
 
2146
  /* Previously detected shift-counts computed by NEGATE_EXPR
2147
     and shifted in the other direction; but that does not work
2148
     on all machines.  */
2149
 
2150
  if (SHIFT_COUNT_TRUNCATED)
2151
    {
2152
      if (CONST_INT_P (op1)
2153
          && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2154
              (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2155
        op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2156
                       % GET_MODE_BITSIZE (mode));
2157
      else if (GET_CODE (op1) == SUBREG
2158
               && subreg_lowpart_p (op1)
2159
               && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2160
        op1 = SUBREG_REG (op1);
2161
    }
2162
 
2163
  if (op1 == const0_rtx)
2164
    return shifted;
2165
 
2166
  /* Check whether its cheaper to implement a left shift by a constant
2167
     bit count by a sequence of additions.  */
2168
  if (code == LSHIFT_EXPR
2169
      && CONST_INT_P (op1)
2170
      && INTVAL (op1) > 0
2171
      && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2172
      && INTVAL (op1) < MAX_BITS_PER_WORD
2173
      && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2174
      && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2175
    {
2176
      int i;
2177
      for (i = 0; i < INTVAL (op1); i++)
2178
        {
2179
          temp = force_reg (mode, shifted);
2180
          shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2181
                                  unsignedp, OPTAB_LIB_WIDEN);
2182
        }
2183
      return shifted;
2184
    }
2185
 
2186
  for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2187
    {
2188
      enum optab_methods methods;
2189
 
2190
      if (attempt == 0)
2191
        methods = OPTAB_DIRECT;
2192
      else if (attempt == 1)
2193
        methods = OPTAB_WIDEN;
2194
      else
2195
        methods = OPTAB_LIB_WIDEN;
2196
 
2197
      if (rotate)
2198
        {
2199
          /* Widening does not work for rotation.  */
2200
          if (methods == OPTAB_WIDEN)
2201
            continue;
2202
          else if (methods == OPTAB_LIB_WIDEN)
2203
            {
2204
              /* If we have been unable to open-code this by a rotation,
2205
                 do it as the IOR of two shifts.  I.e., to rotate A
2206
                 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2207
                 where C is the bitsize of A.
2208
 
2209
                 It is theoretically possible that the target machine might
2210
                 not be able to perform either shift and hence we would
2211
                 be making two libcalls rather than just the one for the
2212
                 shift (similarly if IOR could not be done).  We will allow
2213
                 this extremely unlikely lossage to avoid complicating the
2214
                 code below.  */
2215
 
2216
              rtx subtarget = target == shifted ? 0 : target;
2217
              tree new_amount, other_amount;
2218
              rtx temp1;
2219
              tree type = TREE_TYPE (amount);
2220
              if (GET_MODE (op1) != TYPE_MODE (type)
2221
                  && GET_MODE (op1) != VOIDmode)
2222
                op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2223
              new_amount = make_tree (type, op1);
2224
              other_amount
2225
                = fold_build2 (MINUS_EXPR, type,
2226
                               build_int_cst (type, GET_MODE_BITSIZE (mode)),
2227
                               new_amount);
2228
 
2229
              shifted = force_reg (mode, shifted);
2230
 
2231
              temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2232
                                   mode, shifted, new_amount, 0, 1);
2233
              temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2234
                                    mode, shifted, other_amount, subtarget, 1);
2235
              return expand_binop (mode, ior_optab, temp, temp1, target,
2236
                                   unsignedp, methods);
2237
            }
2238
 
2239
          temp = expand_binop (mode,
2240
                               left ? lrotate_optab : rrotate_optab,
2241
                               shifted, op1, target, unsignedp, methods);
2242
        }
2243
      else if (unsignedp)
2244
        temp = expand_binop (mode,
2245
                             left ? lshift_optab : rshift_uns_optab,
2246
                             shifted, op1, target, unsignedp, methods);
2247
 
2248
      /* Do arithmetic shifts.
2249
         Also, if we are going to widen the operand, we can just as well
2250
         use an arithmetic right-shift instead of a logical one.  */
2251
      if (temp == 0 && ! rotate
2252
          && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2253
        {
2254
          enum optab_methods methods1 = methods;
2255
 
2256
          /* If trying to widen a log shift to an arithmetic shift,
2257
             don't accept an arithmetic shift of the same size.  */
2258
          if (unsignedp)
2259
            methods1 = OPTAB_MUST_WIDEN;
2260
 
2261
          /* Arithmetic shift */
2262
 
2263
          temp = expand_binop (mode,
2264
                               left ? lshift_optab : rshift_arith_optab,
2265
                               shifted, op1, target, unsignedp, methods1);
2266
        }
2267
 
2268
      /* We used to try extzv here for logical right shifts, but that was
2269
         only useful for one machine, the VAX, and caused poor code
2270
         generation there for lshrdi3, so the code was deleted and a
2271
         define_expand for lshrsi3 was added to vax.md.  */
2272
    }
2273
 
2274
  gcc_assert (temp);
2275
  return temp;
2276
}
2277
 
2278
enum alg_code {
2279
  alg_unknown,
2280
  alg_zero,
2281
  alg_m, alg_shift,
2282
  alg_add_t_m2,
2283
  alg_sub_t_m2,
2284
  alg_add_factor,
2285
  alg_sub_factor,
2286
  alg_add_t2_m,
2287
  alg_sub_t2_m,
2288
  alg_impossible
2289
};
2290
 
2291
/* This structure holds the "cost" of a multiply sequence.  The
2292
   "cost" field holds the total rtx_cost of every operator in the
2293
   synthetic multiplication sequence, hence cost(a op b) is defined
2294
   as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2295
   The "latency" field holds the minimum possible latency of the
2296
   synthetic multiply, on a hypothetical infinitely parallel CPU.
2297
   This is the critical path, or the maximum height, of the expression
2298
   tree which is the sum of rtx_costs on the most expensive path from
2299
   any leaf to the root.  Hence latency(a op b) is defined as zero for
2300
   leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2301
 
2302
struct mult_cost {
2303
  short cost;     /* Total rtx_cost of the multiplication sequence.  */
2304
  short latency;  /* The latency of the multiplication sequence.  */
2305
};
2306
 
2307
/* This macro is used to compare a pointer to a mult_cost against an
2308
   single integer "rtx_cost" value.  This is equivalent to the macro
2309
   CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2310
#define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2311
                             || ((X)->cost == (Y) && (X)->latency < (Y)))
2312
 
2313
/* This macro is used to compare two pointers to mult_costs against
2314
   each other.  The macro returns true if X is cheaper than Y.
2315
   Currently, the cheaper of two mult_costs is the one with the
2316
   lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2317
#define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2318
                                 || ((X)->cost == (Y)->cost     \
2319
                                     && (X)->latency < (Y)->latency))
2320
 
2321
/* This structure records a sequence of operations.
2322
   `ops' is the number of operations recorded.
2323
   `cost' is their total cost.
2324
   The operations are stored in `op' and the corresponding
2325
   logarithms of the integer coefficients in `log'.
2326
 
2327
   These are the operations:
2328
   alg_zero             total := 0;
2329
   alg_m                total := multiplicand;
2330
   alg_shift            total := total * coeff
2331
   alg_add_t_m2         total := total + multiplicand * coeff;
2332
   alg_sub_t_m2         total := total - multiplicand * coeff;
2333
   alg_add_factor       total := total * coeff + total;
2334
   alg_sub_factor       total := total * coeff - total;
2335
   alg_add_t2_m         total := total * coeff + multiplicand;
2336
   alg_sub_t2_m         total := total * coeff - multiplicand;
2337
 
2338
   The first operand must be either alg_zero or alg_m.  */
2339
 
2340
struct algorithm
2341
{
2342
  struct mult_cost cost;
2343
  short ops;
2344
  /* The size of the OP and LOG fields are not directly related to the
2345
     word size, but the worst-case algorithms will be if we have few
2346
     consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2347
     In that case we will generate shift-by-2, add, shift-by-2, add,...,
2348
     in total wordsize operations.  */
2349
  enum alg_code op[MAX_BITS_PER_WORD];
2350
  char log[MAX_BITS_PER_WORD];
2351
};
2352
 
2353
/* The entry for our multiplication cache/hash table.  */
2354
struct alg_hash_entry {
2355
  /* The number we are multiplying by.  */
2356
  unsigned HOST_WIDE_INT t;
2357
 
2358
  /* The mode in which we are multiplying something by T.  */
2359
  enum machine_mode mode;
2360
 
2361
  /* The best multiplication algorithm for t.  */
2362
  enum alg_code alg;
2363
 
2364
  /* The cost of multiplication if ALG_CODE is not alg_impossible.
2365
     Otherwise, the cost within which multiplication by T is
2366
     impossible.  */
2367
  struct mult_cost cost;
2368
 
2369
  /* OPtimized for speed? */
2370
  bool speed;
2371
};
2372
 
2373
/* The number of cache/hash entries.  */
2374
#if HOST_BITS_PER_WIDE_INT == 64
2375
#define NUM_ALG_HASH_ENTRIES 1031
2376
#else
2377
#define NUM_ALG_HASH_ENTRIES 307
2378
#endif
2379
 
2380
/* Each entry of ALG_HASH caches alg_code for some integer.  This is
2381
   actually a hash table.  If we have a collision, that the older
2382
   entry is kicked out.  */
2383
static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2384
 
2385
/* Indicates the type of fixup needed after a constant multiplication.
2386
   BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2387
   the result should be negated, and ADD_VARIANT means that the
2388
   multiplicand should be added to the result.  */
2389
enum mult_variant {basic_variant, negate_variant, add_variant};
2390
 
2391
static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2392
                        const struct mult_cost *, enum machine_mode mode);
2393
static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2394
                                 struct algorithm *, enum mult_variant *, int);
2395
static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2396
                              const struct algorithm *, enum mult_variant);
2397
static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2398
                                                 int, rtx *, int *, int *);
2399
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2400
static rtx extract_high_half (enum machine_mode, rtx);
2401
static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2402
static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2403
                                       int, int);
2404
/* Compute and return the best algorithm for multiplying by T.
2405
   The algorithm must cost less than cost_limit
2406
   If retval.cost >= COST_LIMIT, no algorithm was found and all
2407
   other field of the returned struct are undefined.
2408
   MODE is the machine mode of the multiplication.  */
2409
 
2410
static void
2411
synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2412
            const struct mult_cost *cost_limit, enum machine_mode mode)
2413
{
2414
  int m;
2415
  struct algorithm *alg_in, *best_alg;
2416
  struct mult_cost best_cost;
2417
  struct mult_cost new_limit;
2418
  int op_cost, op_latency;
2419
  unsigned HOST_WIDE_INT orig_t = t;
2420
  unsigned HOST_WIDE_INT q;
2421
  int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2422
  int hash_index;
2423
  bool cache_hit = false;
2424
  enum alg_code cache_alg = alg_zero;
2425
  bool speed = optimize_insn_for_speed_p ();
2426
 
2427
  /* Indicate that no algorithm is yet found.  If no algorithm
2428
     is found, this value will be returned and indicate failure.  */
2429
  alg_out->cost.cost = cost_limit->cost + 1;
2430
  alg_out->cost.latency = cost_limit->latency + 1;
2431
 
2432
  if (cost_limit->cost < 0
2433
      || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2434
    return;
2435
 
2436
  /* Restrict the bits of "t" to the multiplication's mode.  */
2437
  t &= GET_MODE_MASK (mode);
2438
 
2439
  /* t == 1 can be done in zero cost.  */
2440
  if (t == 1)
2441
    {
2442
      alg_out->ops = 1;
2443
      alg_out->cost.cost = 0;
2444
      alg_out->cost.latency = 0;
2445
      alg_out->op[0] = alg_m;
2446
      return;
2447
    }
2448
 
2449
  /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2450
     fail now.  */
2451
  if (t == 0)
2452
    {
2453
      if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2454
        return;
2455
      else
2456
        {
2457
          alg_out->ops = 1;
2458
          alg_out->cost.cost = zero_cost[speed];
2459
          alg_out->cost.latency = zero_cost[speed];
2460
          alg_out->op[0] = alg_zero;
2461
          return;
2462
        }
2463
    }
2464
 
2465
  /* We'll be needing a couple extra algorithm structures now.  */
2466
 
2467
  alg_in = XALLOCA (struct algorithm);
2468
  best_alg = XALLOCA (struct algorithm);
2469
  best_cost = *cost_limit;
2470
 
2471
  /* Compute the hash index.  */
2472
  hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2473
 
2474
  /* See if we already know what to do for T.  */
2475
  if (alg_hash[hash_index].t == t
2476
      && alg_hash[hash_index].mode == mode
2477
      && alg_hash[hash_index].mode == mode
2478
      && alg_hash[hash_index].speed == speed
2479
      && alg_hash[hash_index].alg != alg_unknown)
2480
    {
2481
      cache_alg = alg_hash[hash_index].alg;
2482
 
2483
      if (cache_alg == alg_impossible)
2484
        {
2485
          /* The cache tells us that it's impossible to synthesize
2486
             multiplication by T within alg_hash[hash_index].cost.  */
2487
          if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2488
            /* COST_LIMIT is at least as restrictive as the one
2489
               recorded in the hash table, in which case we have no
2490
               hope of synthesizing a multiplication.  Just
2491
               return.  */
2492
            return;
2493
 
2494
          /* If we get here, COST_LIMIT is less restrictive than the
2495
             one recorded in the hash table, so we may be able to
2496
             synthesize a multiplication.  Proceed as if we didn't
2497
             have the cache entry.  */
2498
        }
2499
      else
2500
        {
2501
          if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2502
            /* The cached algorithm shows that this multiplication
2503
               requires more cost than COST_LIMIT.  Just return.  This
2504
               way, we don't clobber this cache entry with
2505
               alg_impossible but retain useful information.  */
2506
            return;
2507
 
2508
          cache_hit = true;
2509
 
2510
          switch (cache_alg)
2511
            {
2512
            case alg_shift:
2513
              goto do_alg_shift;
2514
 
2515
            case alg_add_t_m2:
2516
            case alg_sub_t_m2:
2517
              goto do_alg_addsub_t_m2;
2518
 
2519
            case alg_add_factor:
2520
            case alg_sub_factor:
2521
              goto do_alg_addsub_factor;
2522
 
2523
            case alg_add_t2_m:
2524
              goto do_alg_add_t2_m;
2525
 
2526
            case alg_sub_t2_m:
2527
              goto do_alg_sub_t2_m;
2528
 
2529
            default:
2530
              gcc_unreachable ();
2531
            }
2532
        }
2533
    }
2534
 
2535
  /* If we have a group of zero bits at the low-order part of T, try
2536
     multiplying by the remaining bits and then doing a shift.  */
2537
 
2538
  if ((t & 1) == 0)
2539
    {
2540
    do_alg_shift:
2541
      m = floor_log2 (t & -t);  /* m = number of low zero bits */
2542
      if (m < maxm)
2543
        {
2544
          q = t >> m;
2545
          /* The function expand_shift will choose between a shift and
2546
             a sequence of additions, so the observed cost is given as
2547
             MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2548
          op_cost = m * add_cost[speed][mode];
2549
          if (shift_cost[speed][mode][m] < op_cost)
2550
            op_cost = shift_cost[speed][mode][m];
2551
          new_limit.cost = best_cost.cost - op_cost;
2552
          new_limit.latency = best_cost.latency - op_cost;
2553
          synth_mult (alg_in, q, &new_limit, mode);
2554
 
2555
          alg_in->cost.cost += op_cost;
2556
          alg_in->cost.latency += op_cost;
2557
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2558
            {
2559
              struct algorithm *x;
2560
              best_cost = alg_in->cost;
2561
              x = alg_in, alg_in = best_alg, best_alg = x;
2562
              best_alg->log[best_alg->ops] = m;
2563
              best_alg->op[best_alg->ops] = alg_shift;
2564
            }
2565
 
2566
          /* See if treating ORIG_T as a signed number yields a better
2567
             sequence.  Try this sequence only for a negative ORIG_T
2568
             as it would be useless for a non-negative ORIG_T.  */
2569
          if ((HOST_WIDE_INT) orig_t < 0)
2570
            {
2571
              /* Shift ORIG_T as follows because a right shift of a
2572
                 negative-valued signed type is implementation
2573
                 defined.  */
2574
              q = ~(~orig_t >> m);
2575
              /* The function expand_shift will choose between a shift
2576
                 and a sequence of additions, so the observed cost is
2577
                 given as MIN (m * add_cost[speed][mode],
2578
                 shift_cost[speed][mode][m]).  */
2579
              op_cost = m * add_cost[speed][mode];
2580
              if (shift_cost[speed][mode][m] < op_cost)
2581
                op_cost = shift_cost[speed][mode][m];
2582
              new_limit.cost = best_cost.cost - op_cost;
2583
              new_limit.latency = best_cost.latency - op_cost;
2584
              synth_mult (alg_in, q, &new_limit, mode);
2585
 
2586
              alg_in->cost.cost += op_cost;
2587
              alg_in->cost.latency += op_cost;
2588
              if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2589
                {
2590
                  struct algorithm *x;
2591
                  best_cost = alg_in->cost;
2592
                  x = alg_in, alg_in = best_alg, best_alg = x;
2593
                  best_alg->log[best_alg->ops] = m;
2594
                  best_alg->op[best_alg->ops] = alg_shift;
2595
                }
2596
            }
2597
        }
2598
      if (cache_hit)
2599
        goto done;
2600
    }
2601
 
2602
  /* If we have an odd number, add or subtract one.  */
2603
  if ((t & 1) != 0)
2604
    {
2605
      unsigned HOST_WIDE_INT w;
2606
 
2607
    do_alg_addsub_t_m2:
2608
      for (w = 1; (w & t) != 0; w <<= 1)
2609
        ;
2610
      /* If T was -1, then W will be zero after the loop.  This is another
2611
         case where T ends with ...111.  Handling this with (T + 1) and
2612
         subtract 1 produces slightly better code and results in algorithm
2613
         selection much faster than treating it like the ...0111 case
2614
         below.  */
2615
      if (w == 0
2616
          || (w > 2
2617
              /* Reject the case where t is 3.
2618
                 Thus we prefer addition in that case.  */
2619
              && t != 3))
2620
        {
2621
          /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2622
 
2623
          op_cost = add_cost[speed][mode];
2624
          new_limit.cost = best_cost.cost - op_cost;
2625
          new_limit.latency = best_cost.latency - op_cost;
2626
          synth_mult (alg_in, t + 1, &new_limit, mode);
2627
 
2628
          alg_in->cost.cost += op_cost;
2629
          alg_in->cost.latency += op_cost;
2630
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2631
            {
2632
              struct algorithm *x;
2633
              best_cost = alg_in->cost;
2634
              x = alg_in, alg_in = best_alg, best_alg = x;
2635
              best_alg->log[best_alg->ops] = 0;
2636
              best_alg->op[best_alg->ops] = alg_sub_t_m2;
2637
            }
2638
        }
2639
      else
2640
        {
2641
          /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2642
 
2643
          op_cost = add_cost[speed][mode];
2644
          new_limit.cost = best_cost.cost - op_cost;
2645
          new_limit.latency = best_cost.latency - op_cost;
2646
          synth_mult (alg_in, t - 1, &new_limit, mode);
2647
 
2648
          alg_in->cost.cost += op_cost;
2649
          alg_in->cost.latency += op_cost;
2650
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2651
            {
2652
              struct algorithm *x;
2653
              best_cost = alg_in->cost;
2654
              x = alg_in, alg_in = best_alg, best_alg = x;
2655
              best_alg->log[best_alg->ops] = 0;
2656
              best_alg->op[best_alg->ops] = alg_add_t_m2;
2657
            }
2658
        }
2659
 
2660
      /* We may be able to calculate a * -7, a * -15, a * -31, etc
2661
         quickly with a - a * n for some appropriate constant n.  */
2662
      m = exact_log2 (-orig_t + 1);
2663
      if (m >= 0 && m < maxm)
2664
        {
2665
          op_cost = shiftsub1_cost[speed][mode][m];
2666
          new_limit.cost = best_cost.cost - op_cost;
2667
          new_limit.latency = best_cost.latency - op_cost;
2668
          synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2669
 
2670
          alg_in->cost.cost += op_cost;
2671
          alg_in->cost.latency += op_cost;
2672
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2673
            {
2674
              struct algorithm *x;
2675
              best_cost = alg_in->cost;
2676
              x = alg_in, alg_in = best_alg, best_alg = x;
2677
              best_alg->log[best_alg->ops] = m;
2678
              best_alg->op[best_alg->ops] = alg_sub_t_m2;
2679
            }
2680
        }
2681
 
2682
      if (cache_hit)
2683
        goto done;
2684
    }
2685
 
2686
  /* Look for factors of t of the form
2687
     t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2688
     If we find such a factor, we can multiply by t using an algorithm that
2689
     multiplies by q, shift the result by m and add/subtract it to itself.
2690
 
2691
     We search for large factors first and loop down, even if large factors
2692
     are less probable than small; if we find a large factor we will find a
2693
     good sequence quickly, and therefore be able to prune (by decreasing
2694
     COST_LIMIT) the search.  */
2695
 
2696
 do_alg_addsub_factor:
2697
  for (m = floor_log2 (t - 1); m >= 2; m--)
2698
    {
2699
      unsigned HOST_WIDE_INT d;
2700
 
2701
      d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2702
      if (t % d == 0 && t > d && m < maxm
2703
          && (!cache_hit || cache_alg == alg_add_factor))
2704
        {
2705
          /* If the target has a cheap shift-and-add instruction use
2706
             that in preference to a shift insn followed by an add insn.
2707
             Assume that the shift-and-add is "atomic" with a latency
2708
             equal to its cost, otherwise assume that on superscalar
2709
             hardware the shift may be executed concurrently with the
2710
             earlier steps in the algorithm.  */
2711
          op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2712
          if (shiftadd_cost[speed][mode][m] < op_cost)
2713
            {
2714
              op_cost = shiftadd_cost[speed][mode][m];
2715
              op_latency = op_cost;
2716
            }
2717
          else
2718
            op_latency = add_cost[speed][mode];
2719
 
2720
          new_limit.cost = best_cost.cost - op_cost;
2721
          new_limit.latency = best_cost.latency - op_latency;
2722
          synth_mult (alg_in, t / d, &new_limit, mode);
2723
 
2724
          alg_in->cost.cost += op_cost;
2725
          alg_in->cost.latency += op_latency;
2726
          if (alg_in->cost.latency < op_cost)
2727
            alg_in->cost.latency = op_cost;
2728
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2729
            {
2730
              struct algorithm *x;
2731
              best_cost = alg_in->cost;
2732
              x = alg_in, alg_in = best_alg, best_alg = x;
2733
              best_alg->log[best_alg->ops] = m;
2734
              best_alg->op[best_alg->ops] = alg_add_factor;
2735
            }
2736
          /* Other factors will have been taken care of in the recursion.  */
2737
          break;
2738
        }
2739
 
2740
      d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2741
      if (t % d == 0 && t > d && m < maxm
2742
          && (!cache_hit || cache_alg == alg_sub_factor))
2743
        {
2744
          /* If the target has a cheap shift-and-subtract insn use
2745
             that in preference to a shift insn followed by a sub insn.
2746
             Assume that the shift-and-sub is "atomic" with a latency
2747
             equal to it's cost, otherwise assume that on superscalar
2748
             hardware the shift may be executed concurrently with the
2749
             earlier steps in the algorithm.  */
2750
          op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2751
          if (shiftsub0_cost[speed][mode][m] < op_cost)
2752
            {
2753
              op_cost = shiftsub0_cost[speed][mode][m];
2754
              op_latency = op_cost;
2755
            }
2756
          else
2757
            op_latency = add_cost[speed][mode];
2758
 
2759
          new_limit.cost = best_cost.cost - op_cost;
2760
          new_limit.latency = best_cost.latency - op_latency;
2761
          synth_mult (alg_in, t / d, &new_limit, mode);
2762
 
2763
          alg_in->cost.cost += op_cost;
2764
          alg_in->cost.latency += op_latency;
2765
          if (alg_in->cost.latency < op_cost)
2766
            alg_in->cost.latency = op_cost;
2767
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2768
            {
2769
              struct algorithm *x;
2770
              best_cost = alg_in->cost;
2771
              x = alg_in, alg_in = best_alg, best_alg = x;
2772
              best_alg->log[best_alg->ops] = m;
2773
              best_alg->op[best_alg->ops] = alg_sub_factor;
2774
            }
2775
          break;
2776
        }
2777
    }
2778
  if (cache_hit)
2779
    goto done;
2780
 
2781
  /* Try shift-and-add (load effective address) instructions,
2782
     i.e. do a*3, a*5, a*9.  */
2783
  if ((t & 1) != 0)
2784
    {
2785
    do_alg_add_t2_m:
2786
      q = t - 1;
2787
      q = q & -q;
2788
      m = exact_log2 (q);
2789
      if (m >= 0 && m < maxm)
2790
        {
2791
          op_cost = shiftadd_cost[speed][mode][m];
2792
          new_limit.cost = best_cost.cost - op_cost;
2793
          new_limit.latency = best_cost.latency - op_cost;
2794
          synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2795
 
2796
          alg_in->cost.cost += op_cost;
2797
          alg_in->cost.latency += op_cost;
2798
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2799
            {
2800
              struct algorithm *x;
2801
              best_cost = alg_in->cost;
2802
              x = alg_in, alg_in = best_alg, best_alg = x;
2803
              best_alg->log[best_alg->ops] = m;
2804
              best_alg->op[best_alg->ops] = alg_add_t2_m;
2805
            }
2806
        }
2807
      if (cache_hit)
2808
        goto done;
2809
 
2810
    do_alg_sub_t2_m:
2811
      q = t + 1;
2812
      q = q & -q;
2813
      m = exact_log2 (q);
2814
      if (m >= 0 && m < maxm)
2815
        {
2816
          op_cost = shiftsub0_cost[speed][mode][m];
2817
          new_limit.cost = best_cost.cost - op_cost;
2818
          new_limit.latency = best_cost.latency - op_cost;
2819
          synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2820
 
2821
          alg_in->cost.cost += op_cost;
2822
          alg_in->cost.latency += op_cost;
2823
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2824
            {
2825
              struct algorithm *x;
2826
              best_cost = alg_in->cost;
2827
              x = alg_in, alg_in = best_alg, best_alg = x;
2828
              best_alg->log[best_alg->ops] = m;
2829
              best_alg->op[best_alg->ops] = alg_sub_t2_m;
2830
            }
2831
        }
2832
      if (cache_hit)
2833
        goto done;
2834
    }
2835
 
2836
 done:
2837
  /* If best_cost has not decreased, we have not found any algorithm.  */
2838
  if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2839
    {
2840
      /* We failed to find an algorithm.  Record alg_impossible for
2841
         this case (that is, <T, MODE, COST_LIMIT>) so that next time
2842
         we are asked to find an algorithm for T within the same or
2843
         lower COST_LIMIT, we can immediately return to the
2844
         caller.  */
2845
      alg_hash[hash_index].t = t;
2846
      alg_hash[hash_index].mode = mode;
2847
      alg_hash[hash_index].speed = speed;
2848
      alg_hash[hash_index].alg = alg_impossible;
2849
      alg_hash[hash_index].cost = *cost_limit;
2850
      return;
2851
    }
2852
 
2853
  /* Cache the result.  */
2854
  if (!cache_hit)
2855
    {
2856
      alg_hash[hash_index].t = t;
2857
      alg_hash[hash_index].mode = mode;
2858
      alg_hash[hash_index].speed = speed;
2859
      alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2860
      alg_hash[hash_index].cost.cost = best_cost.cost;
2861
      alg_hash[hash_index].cost.latency = best_cost.latency;
2862
    }
2863
 
2864
  /* If we are getting a too long sequence for `struct algorithm'
2865
     to record, make this search fail.  */
2866
  if (best_alg->ops == MAX_BITS_PER_WORD)
2867
    return;
2868
 
2869
  /* Copy the algorithm from temporary space to the space at alg_out.
2870
     We avoid using structure assignment because the majority of
2871
     best_alg is normally undefined, and this is a critical function.  */
2872
  alg_out->ops = best_alg->ops + 1;
2873
  alg_out->cost = best_cost;
2874
  memcpy (alg_out->op, best_alg->op,
2875
          alg_out->ops * sizeof *alg_out->op);
2876
  memcpy (alg_out->log, best_alg->log,
2877
          alg_out->ops * sizeof *alg_out->log);
2878
}
2879
 
2880
/* Find the cheapest way of multiplying a value of mode MODE by VAL.
2881
   Try three variations:
2882
 
2883
       - a shift/add sequence based on VAL itself
2884
       - a shift/add sequence based on -VAL, followed by a negation
2885
       - a shift/add sequence based on VAL - 1, followed by an addition.
2886
 
2887
   Return true if the cheapest of these cost less than MULT_COST,
2888
   describing the algorithm in *ALG and final fixup in *VARIANT.  */
2889
 
2890
static bool
2891
choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2892
                     struct algorithm *alg, enum mult_variant *variant,
2893
                     int mult_cost)
2894
{
2895
  struct algorithm alg2;
2896
  struct mult_cost limit;
2897
  int op_cost;
2898
  bool speed = optimize_insn_for_speed_p ();
2899
 
2900
  /* Fail quickly for impossible bounds.  */
2901
  if (mult_cost < 0)
2902
    return false;
2903
 
2904
  /* Ensure that mult_cost provides a reasonable upper bound.
2905
     Any constant multiplication can be performed with less
2906
     than 2 * bits additions.  */
2907
  op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2908
  if (mult_cost > op_cost)
2909
    mult_cost = op_cost;
2910
 
2911
  *variant = basic_variant;
2912
  limit.cost = mult_cost;
2913
  limit.latency = mult_cost;
2914
  synth_mult (alg, val, &limit, mode);
2915
 
2916
  /* This works only if the inverted value actually fits in an
2917
     `unsigned int' */
2918
  if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2919
    {
2920
      op_cost = neg_cost[speed][mode];
2921
      if (MULT_COST_LESS (&alg->cost, mult_cost))
2922
        {
2923
          limit.cost = alg->cost.cost - op_cost;
2924
          limit.latency = alg->cost.latency - op_cost;
2925
        }
2926
      else
2927
        {
2928
          limit.cost = mult_cost - op_cost;
2929
          limit.latency = mult_cost - op_cost;
2930
        }
2931
 
2932
      synth_mult (&alg2, -val, &limit, mode);
2933
      alg2.cost.cost += op_cost;
2934
      alg2.cost.latency += op_cost;
2935
      if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2936
        *alg = alg2, *variant = negate_variant;
2937
    }
2938
 
2939
  /* This proves very useful for division-by-constant.  */
2940
  op_cost = add_cost[speed][mode];
2941
  if (MULT_COST_LESS (&alg->cost, mult_cost))
2942
    {
2943
      limit.cost = alg->cost.cost - op_cost;
2944
      limit.latency = alg->cost.latency - op_cost;
2945
    }
2946
  else
2947
    {
2948
      limit.cost = mult_cost - op_cost;
2949
      limit.latency = mult_cost - op_cost;
2950
    }
2951
 
2952
  synth_mult (&alg2, val - 1, &limit, mode);
2953
  alg2.cost.cost += op_cost;
2954
  alg2.cost.latency += op_cost;
2955
  if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2956
    *alg = alg2, *variant = add_variant;
2957
 
2958
  return MULT_COST_LESS (&alg->cost, mult_cost);
2959
}
2960
 
2961
/* A subroutine of expand_mult, used for constant multiplications.
2962
   Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2963
   convenient.  Use the shift/add sequence described by ALG and apply
2964
   the final fixup specified by VARIANT.  */
2965
 
2966
static rtx
2967
expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2968
                   rtx target, const struct algorithm *alg,
2969
                   enum mult_variant variant)
2970
{
2971
  HOST_WIDE_INT val_so_far;
2972
  rtx insn, accum, tem;
2973
  int opno;
2974
  enum machine_mode nmode;
2975
 
2976
  /* Avoid referencing memory over and over and invalid sharing
2977
     on SUBREGs.  */
2978
  op0 = force_reg (mode, op0);
2979
 
2980
  /* ACCUM starts out either as OP0 or as a zero, depending on
2981
     the first operation.  */
2982
 
2983
  if (alg->op[0] == alg_zero)
2984
    {
2985
      accum = copy_to_mode_reg (mode, const0_rtx);
2986
      val_so_far = 0;
2987
    }
2988
  else if (alg->op[0] == alg_m)
2989
    {
2990
      accum = copy_to_mode_reg (mode, op0);
2991
      val_so_far = 1;
2992
    }
2993
  else
2994
    gcc_unreachable ();
2995
 
2996
  for (opno = 1; opno < alg->ops; opno++)
2997
    {
2998
      int log = alg->log[opno];
2999
      rtx shift_subtarget = optimize ? 0 : accum;
3000
      rtx add_target
3001
        = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3002
           && !optimize)
3003
          ? target : 0;
3004
      rtx accum_target = optimize ? 0 : accum;
3005
 
3006
      switch (alg->op[opno])
3007
        {
3008
        case alg_shift:
3009 378 julius
          tem = expand_shift (LSHIFT_EXPR, mode, accum,
3010
                              build_int_cst (NULL_TREE, log),
3011
                              NULL_RTX, 0);
3012
          /* REG_EQUAL note will be attached to the following insn.  */
3013
          emit_move_insn (accum, tem);
3014 280 jeremybenn
          val_so_far <<= log;
3015
          break;
3016
 
3017
        case alg_add_t_m2:
3018
          tem = expand_shift (LSHIFT_EXPR, mode, op0,
3019
                              build_int_cst (NULL_TREE, log),
3020
                              NULL_RTX, 0);
3021
          accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3022
                                 add_target ? add_target : accum_target);
3023
          val_so_far += (HOST_WIDE_INT) 1 << log;
3024
          break;
3025
 
3026
        case alg_sub_t_m2:
3027
          tem = expand_shift (LSHIFT_EXPR, mode, op0,
3028
                              build_int_cst (NULL_TREE, log),
3029
                              NULL_RTX, 0);
3030
          accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3031
                                 add_target ? add_target : accum_target);
3032
          val_so_far -= (HOST_WIDE_INT) 1 << log;
3033
          break;
3034
 
3035
        case alg_add_t2_m:
3036
          accum = expand_shift (LSHIFT_EXPR, mode, accum,
3037
                                build_int_cst (NULL_TREE, log),
3038
                                shift_subtarget,
3039
                                0);
3040
          accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3041
                                 add_target ? add_target : accum_target);
3042
          val_so_far = (val_so_far << log) + 1;
3043
          break;
3044
 
3045
        case alg_sub_t2_m:
3046
          accum = expand_shift (LSHIFT_EXPR, mode, accum,
3047
                                build_int_cst (NULL_TREE, log),
3048
                                shift_subtarget, 0);
3049
          accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3050
                                 add_target ? add_target : accum_target);
3051
          val_so_far = (val_so_far << log) - 1;
3052
          break;
3053
 
3054
        case alg_add_factor:
3055
          tem = expand_shift (LSHIFT_EXPR, mode, accum,
3056
                              build_int_cst (NULL_TREE, log),
3057
                              NULL_RTX, 0);
3058
          accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3059
                                 add_target ? add_target : accum_target);
3060
          val_so_far += val_so_far << log;
3061
          break;
3062
 
3063
        case alg_sub_factor:
3064
          tem = expand_shift (LSHIFT_EXPR, mode, accum,
3065
                              build_int_cst (NULL_TREE, log),
3066
                              NULL_RTX, 0);
3067
          accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3068
                                 (add_target
3069
                                  ? add_target : (optimize ? 0 : tem)));
3070
          val_so_far = (val_so_far << log) - val_so_far;
3071
          break;
3072
 
3073
        default:
3074
          gcc_unreachable ();
3075
        }
3076
 
3077
      /* Write a REG_EQUAL note on the last insn so that we can cse
3078
         multiplication sequences.  Note that if ACCUM is a SUBREG,
3079
         we've set the inner register and must properly indicate
3080
         that.  */
3081
 
3082
      tem = op0, nmode = mode;
3083
      if (GET_CODE (accum) == SUBREG)
3084
        {
3085
          nmode = GET_MODE (SUBREG_REG (accum));
3086
          tem = gen_lowpart (nmode, op0);
3087
        }
3088
 
3089
      insn = get_last_insn ();
3090
      set_unique_reg_note (insn, REG_EQUAL,
3091
                           gen_rtx_MULT (nmode, tem,
3092
                                         GEN_INT (val_so_far)));
3093
    }
3094
 
3095
  if (variant == negate_variant)
3096
    {
3097
      val_so_far = -val_so_far;
3098
      accum = expand_unop (mode, neg_optab, accum, target, 0);
3099
    }
3100
  else if (variant == add_variant)
3101
    {
3102
      val_so_far = val_so_far + 1;
3103
      accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3104
    }
3105
 
3106
  /* Compare only the bits of val and val_so_far that are significant
3107
     in the result mode, to avoid sign-/zero-extension confusion.  */
3108
  val &= GET_MODE_MASK (mode);
3109
  val_so_far &= GET_MODE_MASK (mode);
3110
  gcc_assert (val == val_so_far);
3111
 
3112
  return accum;
3113
}
3114
 
3115
/* Perform a multiplication and return an rtx for the result.
3116
   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3117
   TARGET is a suggestion for where to store the result (an rtx).
3118
 
3119
   We check specially for a constant integer as OP1.
3120
   If you want this check for OP0 as well, then before calling
3121
   you should swap the two operands if OP0 would be constant.  */
3122
 
3123
rtx
3124
expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3125
             int unsignedp)
3126
{
3127
  enum mult_variant variant;
3128
  struct algorithm algorithm;
3129
  int max_cost;
3130
  bool speed = optimize_insn_for_speed_p ();
3131
 
3132
  /* Handling const0_rtx here allows us to use zero as a rogue value for
3133
     coeff below.  */
3134
  if (op1 == const0_rtx)
3135
    return const0_rtx;
3136
  if (op1 == const1_rtx)
3137
    return op0;
3138
  if (op1 == constm1_rtx)
3139
    return expand_unop (mode,
3140
                        GET_MODE_CLASS (mode) == MODE_INT
3141
                        && !unsignedp && flag_trapv
3142
                        ? negv_optab : neg_optab,
3143
                        op0, target, 0);
3144
 
3145
  /* These are the operations that are potentially turned into a sequence
3146
     of shifts and additions.  */
3147
  if (SCALAR_INT_MODE_P (mode)
3148
      && (unsignedp || !flag_trapv))
3149
    {
3150
      HOST_WIDE_INT coeff = 0;
3151
      rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3152
 
3153
      /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3154
         less than or equal in size to `unsigned int' this doesn't matter.
3155
         If the mode is larger than `unsigned int', then synth_mult works
3156
         only if the constant value exactly fits in an `unsigned int' without
3157
         any truncation.  This means that multiplying by negative values does
3158
         not work; results are off by 2^32 on a 32 bit machine.  */
3159
 
3160
      if (CONST_INT_P (op1))
3161
        {
3162
          /* Attempt to handle multiplication of DImode values by negative
3163
             coefficients, by performing the multiplication by a positive
3164
             multiplier and then inverting the result.  */
3165
          if (INTVAL (op1) < 0
3166
              && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3167
            {
3168
              /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3169
                 result is interpreted as an unsigned coefficient.
3170
                 Exclude cost of op0 from max_cost to match the cost
3171
                 calculation of the synth_mult.  */
3172
              max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3173
                         - neg_cost[speed][mode];
3174
              if (max_cost > 0
3175
                  && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3176
                                          &variant, max_cost))
3177
                {
3178
                  rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3179
                                                NULL_RTX, &algorithm,
3180
                                                variant);
3181
                  return expand_unop (mode, neg_optab, temp, target, 0);
3182
                }
3183
            }
3184
          else coeff = INTVAL (op1);
3185
        }
3186
      else if (GET_CODE (op1) == CONST_DOUBLE)
3187
        {
3188
          /* If we are multiplying in DImode, it may still be a win
3189
             to try to work with shifts and adds.  */
3190
          if (CONST_DOUBLE_HIGH (op1) == 0
3191
              && CONST_DOUBLE_LOW (op1) > 0)
3192
            coeff = CONST_DOUBLE_LOW (op1);
3193
          else if (CONST_DOUBLE_LOW (op1) == 0
3194
                   && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3195
            {
3196
              int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3197
                          + HOST_BITS_PER_WIDE_INT;
3198
              return expand_shift (LSHIFT_EXPR, mode, op0,
3199
                                   build_int_cst (NULL_TREE, shift),
3200
                                   target, unsignedp);
3201
            }
3202
        }
3203
 
3204
      /* We used to test optimize here, on the grounds that it's better to
3205
         produce a smaller program when -O is not used.  But this causes
3206
         such a terrible slowdown sometimes that it seems better to always
3207
         use synth_mult.  */
3208
      if (coeff != 0)
3209
        {
3210
          /* Special case powers of two.  */
3211
          if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3212
            return expand_shift (LSHIFT_EXPR, mode, op0,
3213
                                 build_int_cst (NULL_TREE, floor_log2 (coeff)),
3214
                                 target, unsignedp);
3215
 
3216
          /* Exclude cost of op0 from max_cost to match the cost
3217
             calculation of the synth_mult.  */
3218
          max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3219
          if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3220
                                   max_cost))
3221
            return expand_mult_const (mode, op0, coeff, target,
3222
                                      &algorithm, variant);
3223
        }
3224
    }
3225
 
3226
  if (GET_CODE (op0) == CONST_DOUBLE)
3227
    {
3228
      rtx temp = op0;
3229
      op0 = op1;
3230
      op1 = temp;
3231
    }
3232
 
3233
  /* Expand x*2.0 as x+x.  */
3234
  if (GET_CODE (op1) == CONST_DOUBLE
3235
      && SCALAR_FLOAT_MODE_P (mode))
3236
    {
3237
      REAL_VALUE_TYPE d;
3238
      REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3239
 
3240
      if (REAL_VALUES_EQUAL (d, dconst2))
3241
        {
3242
          op0 = force_reg (GET_MODE (op0), op0);
3243
          return expand_binop (mode, add_optab, op0, op0,
3244
                               target, unsignedp, OPTAB_LIB_WIDEN);
3245
        }
3246
    }
3247
 
3248
  /* This used to use umul_optab if unsigned, but for non-widening multiply
3249
     there is no difference between signed and unsigned.  */
3250
  op0 = expand_binop (mode,
3251
                      ! unsignedp
3252
                      && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3253
                      ? smulv_optab : smul_optab,
3254
                      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3255
  gcc_assert (op0);
3256
  return op0;
3257
}
3258
 
3259
/* Return the smallest n such that 2**n >= X.  */
3260
 
3261
int
3262
ceil_log2 (unsigned HOST_WIDE_INT x)
3263
{
3264
  return floor_log2 (x - 1) + 1;
3265
}
3266
 
3267
/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3268
   replace division by D, and put the least significant N bits of the result
3269
   in *MULTIPLIER_PTR and return the most significant bit.
3270
 
3271
   The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3272
   needed precision is in PRECISION (should be <= N).
3273
 
3274
   PRECISION should be as small as possible so this function can choose
3275
   multiplier more freely.
3276
 
3277
   The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3278
   is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3279
 
3280
   Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3281
   where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3282
 
3283
static
3284
unsigned HOST_WIDE_INT
3285
choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3286
                   rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3287
{
3288
  HOST_WIDE_INT mhigh_hi, mlow_hi;
3289
  unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3290
  int lgup, post_shift;
3291
  int pow, pow2;
3292
  unsigned HOST_WIDE_INT nl, dummy1;
3293
  HOST_WIDE_INT nh, dummy2;
3294
 
3295
  /* lgup = ceil(log2(divisor)); */
3296
  lgup = ceil_log2 (d);
3297
 
3298
  gcc_assert (lgup <= n);
3299
 
3300
  pow = n + lgup;
3301
  pow2 = n + lgup - precision;
3302
 
3303
  /* We could handle this with some effort, but this case is much
3304
     better handled directly with a scc insn, so rely on caller using
3305
     that.  */
3306
  gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3307
 
3308
  /* mlow = 2^(N + lgup)/d */
3309
 if (pow >= HOST_BITS_PER_WIDE_INT)
3310
    {
3311
      nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3312
      nl = 0;
3313
    }
3314
  else
3315
    {
3316
      nh = 0;
3317
      nl = (unsigned HOST_WIDE_INT) 1 << pow;
3318
    }
3319
  div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3320
                        &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3321
 
3322
  /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3323
  if (pow2 >= HOST_BITS_PER_WIDE_INT)
3324
    nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3325
  else
3326
    nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3327
  div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3328
                        &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3329
 
3330
  gcc_assert (!mhigh_hi || nh - d < d);
3331
  gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3332
  /* Assert that mlow < mhigh.  */
3333
  gcc_assert (mlow_hi < mhigh_hi
3334
              || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3335
 
3336
  /* If precision == N, then mlow, mhigh exceed 2^N
3337
     (but they do not exceed 2^(N+1)).  */
3338
 
3339
  /* Reduce to lowest terms.  */
3340
  for (post_shift = lgup; post_shift > 0; post_shift--)
3341
    {
3342
      unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3343
      unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3344
      if (ml_lo >= mh_lo)
3345
        break;
3346
 
3347
      mlow_hi = 0;
3348
      mlow_lo = ml_lo;
3349
      mhigh_hi = 0;
3350
      mhigh_lo = mh_lo;
3351
    }
3352
 
3353
  *post_shift_ptr = post_shift;
3354
  *lgup_ptr = lgup;
3355
  if (n < HOST_BITS_PER_WIDE_INT)
3356
    {
3357
      unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3358
      *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3359
      return mhigh_lo >= mask;
3360
    }
3361
  else
3362
    {
3363
      *multiplier_ptr = GEN_INT (mhigh_lo);
3364
      return mhigh_hi;
3365
    }
3366
}
3367
 
3368
/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3369
   congruent to 1 (mod 2**N).  */
3370
 
3371
static unsigned HOST_WIDE_INT
3372
invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3373
{
3374
  /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3375
 
3376
  /* The algorithm notes that the choice y = x satisfies
3377
     x*y == 1 mod 2^3, since x is assumed odd.
3378
     Each iteration doubles the number of bits of significance in y.  */
3379
 
3380
  unsigned HOST_WIDE_INT mask;
3381
  unsigned HOST_WIDE_INT y = x;
3382
  int nbit = 3;
3383
 
3384
  mask = (n == HOST_BITS_PER_WIDE_INT
3385
          ? ~(unsigned HOST_WIDE_INT) 0
3386
          : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3387
 
3388
  while (nbit < n)
3389
    {
3390
      y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3391
      nbit *= 2;
3392
    }
3393
  return y;
3394
}
3395
 
3396
/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3397
   flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3398
   product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3399
   to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3400
   become signed.
3401
 
3402
   The result is put in TARGET if that is convenient.
3403
 
3404
   MODE is the mode of operation.  */
3405
 
3406
rtx
3407
expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3408
                             rtx op1, rtx target, int unsignedp)
3409
{
3410
  rtx tem;
3411
  enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3412
 
3413
  tem = expand_shift (RSHIFT_EXPR, mode, op0,
3414
                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3415
                      NULL_RTX, 0);
3416
  tem = expand_and (mode, tem, op1, NULL_RTX);
3417
  adj_operand
3418
    = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3419
                     adj_operand);
3420
 
3421
  tem = expand_shift (RSHIFT_EXPR, mode, op1,
3422
                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3423
                      NULL_RTX, 0);
3424
  tem = expand_and (mode, tem, op0, NULL_RTX);
3425
  target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3426
                          target);
3427
 
3428
  return target;
3429
}
3430
 
3431
/* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3432
 
3433
static rtx
3434
extract_high_half (enum machine_mode mode, rtx op)
3435
{
3436
  enum machine_mode wider_mode;
3437
 
3438
  if (mode == word_mode)
3439
    return gen_highpart (mode, op);
3440
 
3441
  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3442
 
3443
  wider_mode = GET_MODE_WIDER_MODE (mode);
3444
  op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3445
                     build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3446
  return convert_modes (mode, wider_mode, op, 0);
3447
}
3448
 
3449
/* Like expand_mult_highpart, but only consider using a multiplication
3450
   optab.  OP1 is an rtx for the constant operand.  */
3451
 
3452
static rtx
3453
expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3454
                            rtx target, int unsignedp, int max_cost)
3455
{
3456
  rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3457
  enum machine_mode wider_mode;
3458
  optab moptab;
3459
  rtx tem;
3460
  int size;
3461
  bool speed = optimize_insn_for_speed_p ();
3462
 
3463
  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3464
 
3465
  wider_mode = GET_MODE_WIDER_MODE (mode);
3466
  size = GET_MODE_BITSIZE (mode);
3467
 
3468
  /* Firstly, try using a multiplication insn that only generates the needed
3469
     high part of the product, and in the sign flavor of unsignedp.  */
3470
  if (mul_highpart_cost[speed][mode] < max_cost)
3471
    {
3472
      moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3473
      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3474
                          unsignedp, OPTAB_DIRECT);
3475
      if (tem)
3476
        return tem;
3477
    }
3478
 
3479
  /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3480
     Need to adjust the result after the multiplication.  */
3481
  if (size - 1 < BITS_PER_WORD
3482
      && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3483
          + 4 * add_cost[speed][mode] < max_cost))
3484
    {
3485
      moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3486
      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3487
                          unsignedp, OPTAB_DIRECT);
3488
      if (tem)
3489
        /* We used the wrong signedness.  Adjust the result.  */
3490
        return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3491
                                            tem, unsignedp);
3492
    }
3493
 
3494
  /* Try widening multiplication.  */
3495
  moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3496
  if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3497
      && mul_widen_cost[speed][wider_mode] < max_cost)
3498
    {
3499
      tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3500
                          unsignedp, OPTAB_WIDEN);
3501
      if (tem)
3502
        return extract_high_half (mode, tem);
3503
    }
3504
 
3505
  /* Try widening the mode and perform a non-widening multiplication.  */
3506
  if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3507
      && size - 1 < BITS_PER_WORD
3508
      && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3509
    {
3510
      rtx insns, wop0, wop1;
3511
 
3512
      /* We need to widen the operands, for example to ensure the
3513
         constant multiplier is correctly sign or zero extended.
3514
         Use a sequence to clean-up any instructions emitted by
3515
         the conversions if things don't work out.  */
3516
      start_sequence ();
3517
      wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3518
      wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3519
      tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3520
                          unsignedp, OPTAB_WIDEN);
3521
      insns = get_insns ();
3522
      end_sequence ();
3523
 
3524
      if (tem)
3525
        {
3526
          emit_insn (insns);
3527
          return extract_high_half (mode, tem);
3528
        }
3529
    }
3530
 
3531
  /* Try widening multiplication of opposite signedness, and adjust.  */
3532
  moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3533
  if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3534
      && size - 1 < BITS_PER_WORD
3535
      && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3536
          + 4 * add_cost[speed][mode] < max_cost))
3537
    {
3538
      tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3539
                          NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3540
      if (tem != 0)
3541
        {
3542
          tem = extract_high_half (mode, tem);
3543
          /* We used the wrong signedness.  Adjust the result.  */
3544
          return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3545
                                              target, unsignedp);
3546
        }
3547
    }
3548
 
3549
  return 0;
3550
}
3551
 
3552
/* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3553
   putting the high half of the result in TARGET if that is convenient,
3554
   and return where the result is.  If the operation can not be performed,
3555
 
3556
 
3557
   MODE is the mode of operation and result.
3558
 
3559
   UNSIGNEDP nonzero means unsigned multiply.
3560
 
3561
   MAX_COST is the total allowed cost for the expanded RTL.  */
3562
 
3563
static rtx
3564
expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3565
                      rtx target, int unsignedp, int max_cost)
3566
{
3567
  enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3568
  unsigned HOST_WIDE_INT cnst1;
3569
  int extra_cost;
3570
  bool sign_adjust = false;
3571
  enum mult_variant variant;
3572
  struct algorithm alg;
3573
  rtx tem;
3574
  bool speed = optimize_insn_for_speed_p ();
3575
 
3576
  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3577
  /* We can't support modes wider than HOST_BITS_PER_INT.  */
3578
  gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3579
 
3580
  cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3581
 
3582
  /* We can't optimize modes wider than BITS_PER_WORD.
3583
     ??? We might be able to perform double-word arithmetic if
3584
     mode == word_mode, however all the cost calculations in
3585
     synth_mult etc. assume single-word operations.  */
3586
  if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3587
    return expand_mult_highpart_optab (mode, op0, op1, target,
3588
                                       unsignedp, max_cost);
3589
 
3590
  extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3591
 
3592
  /* Check whether we try to multiply by a negative constant.  */
3593
  if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3594
    {
3595
      sign_adjust = true;
3596
      extra_cost += add_cost[speed][mode];
3597
    }
3598
 
3599
  /* See whether shift/add multiplication is cheap enough.  */
3600
  if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3601
                           max_cost - extra_cost))
3602
    {
3603
      /* See whether the specialized multiplication optabs are
3604
         cheaper than the shift/add version.  */
3605
      tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3606
                                        alg.cost.cost + extra_cost);
3607
      if (tem)
3608
        return tem;
3609
 
3610
      tem = convert_to_mode (wider_mode, op0, unsignedp);
3611
      tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3612
      tem = extract_high_half (mode, tem);
3613
 
3614
      /* Adjust result for signedness.  */
3615
      if (sign_adjust)
3616
        tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3617
 
3618
      return tem;
3619
    }
3620
  return expand_mult_highpart_optab (mode, op0, op1, target,
3621
                                     unsignedp, max_cost);
3622
}
3623
 
3624
 
3625
/* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3626
 
3627
static rtx
3628
expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3629
{
3630
  unsigned HOST_WIDE_INT masklow, maskhigh;
3631
  rtx result, temp, shift, label;
3632
  int logd;
3633
 
3634
  logd = floor_log2 (d);
3635
  result = gen_reg_rtx (mode);
3636
 
3637
  /* Avoid conditional branches when they're expensive.  */
3638
  if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3639
      && optimize_insn_for_speed_p ())
3640
    {
3641
      rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3642
                                      mode, 0, -1);
3643
      if (signmask)
3644
        {
3645
          signmask = force_reg (mode, signmask);
3646
          masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3647
          shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3648
 
3649
          /* Use the rtx_cost of a LSHIFTRT instruction to determine
3650
             which instruction sequence to use.  If logical right shifts
3651
             are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3652
             use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3653
 
3654
          temp = gen_rtx_LSHIFTRT (mode, result, shift);
3655
          if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3656
              || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3657
            {
3658
              temp = expand_binop (mode, xor_optab, op0, signmask,
3659
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3660
              temp = expand_binop (mode, sub_optab, temp, signmask,
3661
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3662
              temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3663
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3664
              temp = expand_binop (mode, xor_optab, temp, signmask,
3665
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3666
              temp = expand_binop (mode, sub_optab, temp, signmask,
3667
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3668
            }
3669
          else
3670
            {
3671
              signmask = expand_binop (mode, lshr_optab, signmask, shift,
3672
                                       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3673
              signmask = force_reg (mode, signmask);
3674
 
3675
              temp = expand_binop (mode, add_optab, op0, signmask,
3676
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3677
              temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3678
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3679
              temp = expand_binop (mode, sub_optab, temp, signmask,
3680
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3681
            }
3682
          return temp;
3683
        }
3684
    }
3685
 
3686
  /* Mask contains the mode's signbit and the significant bits of the
3687
     modulus.  By including the signbit in the operation, many targets
3688
     can avoid an explicit compare operation in the following comparison
3689
     against zero.  */
3690
 
3691
  masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3692
  if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3693
    {
3694
      masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3695
      maskhigh = -1;
3696
    }
3697
  else
3698
    maskhigh = (HOST_WIDE_INT) -1
3699
                 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3700
 
3701
  temp = expand_binop (mode, and_optab, op0,
3702
                       immed_double_const (masklow, maskhigh, mode),
3703
                       result, 1, OPTAB_LIB_WIDEN);
3704
  if (temp != result)
3705
    emit_move_insn (result, temp);
3706
 
3707
  label = gen_label_rtx ();
3708
  do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3709
 
3710
  temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3711
                       0, OPTAB_LIB_WIDEN);
3712
  masklow = (HOST_WIDE_INT) -1 << logd;
3713
  maskhigh = -1;
3714
  temp = expand_binop (mode, ior_optab, temp,
3715
                       immed_double_const (masklow, maskhigh, mode),
3716
                       result, 1, OPTAB_LIB_WIDEN);
3717
  temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3718
                       0, OPTAB_LIB_WIDEN);
3719
  if (temp != result)
3720
    emit_move_insn (result, temp);
3721
  emit_label (label);
3722
  return result;
3723
}
3724
 
3725
/* Expand signed division of OP0 by a power of two D in mode MODE.
3726
   This routine is only called for positive values of D.  */
3727
 
3728
static rtx
3729
expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3730
{
3731
  rtx temp, label;
3732
  tree shift;
3733
  int logd;
3734
 
3735
  logd = floor_log2 (d);
3736
  shift = build_int_cst (NULL_TREE, logd);
3737
 
3738
  if (d == 2
3739
      && BRANCH_COST (optimize_insn_for_speed_p (),
3740
                      false) >= 1)
3741
    {
3742
      temp = gen_reg_rtx (mode);
3743
      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3744
      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3745
                           0, OPTAB_LIB_WIDEN);
3746
      return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3747
    }
3748
 
3749
#ifdef HAVE_conditional_move
3750
  if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3751
      >= 2)
3752
    {
3753
      rtx temp2;
3754
 
3755
      /* ??? emit_conditional_move forces a stack adjustment via
3756
         compare_from_rtx so, if the sequence is discarded, it will
3757
         be lost.  Do it now instead.  */
3758
      do_pending_stack_adjust ();
3759
 
3760
      start_sequence ();
3761
      temp2 = copy_to_mode_reg (mode, op0);
3762
      temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3763
                           NULL_RTX, 0, OPTAB_LIB_WIDEN);
3764
      temp = force_reg (mode, temp);
3765
 
3766
      /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3767
      temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3768
                                     mode, temp, temp2, mode, 0);
3769
      if (temp2)
3770
        {
3771
          rtx seq = get_insns ();
3772
          end_sequence ();
3773
          emit_insn (seq);
3774
          return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3775
        }
3776
      end_sequence ();
3777
    }
3778
#endif
3779
 
3780
  if (BRANCH_COST (optimize_insn_for_speed_p (),
3781
                   false) >= 2)
3782
    {
3783
      int ushift = GET_MODE_BITSIZE (mode) - logd;
3784
 
3785
      temp = gen_reg_rtx (mode);
3786
      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3787
      if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3788
        temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3789
                             NULL_RTX, 0, OPTAB_LIB_WIDEN);
3790
      else
3791
        temp = expand_shift (RSHIFT_EXPR, mode, temp,
3792
                             build_int_cst (NULL_TREE, ushift),
3793
                             NULL_RTX, 1);
3794
      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3795
                           0, OPTAB_LIB_WIDEN);
3796
      return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3797
    }
3798
 
3799
  label = gen_label_rtx ();
3800
  temp = copy_to_mode_reg (mode, op0);
3801
  do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3802
  expand_inc (temp, GEN_INT (d - 1));
3803
  emit_label (label);
3804
  return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3805
}
3806
 
3807
/* Emit the code to divide OP0 by OP1, putting the result in TARGET
3808
   if that is convenient, and returning where the result is.
3809
   You may request either the quotient or the remainder as the result;
3810
   specify REM_FLAG nonzero to get the remainder.
3811
 
3812
   CODE is the expression code for which kind of division this is;
3813
   it controls how rounding is done.  MODE is the machine mode to use.
3814
   UNSIGNEDP nonzero means do unsigned division.  */
3815
 
3816
/* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3817
   and then correct it by or'ing in missing high bits
3818
   if result of ANDI is nonzero.
3819
   For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3820
   This could optimize to a bfexts instruction.
3821
   But C doesn't use these operations, so their optimizations are
3822
   left for later.  */
3823
/* ??? For modulo, we don't actually need the highpart of the first product,
3824
   the low part will do nicely.  And for small divisors, the second multiply
3825
   can also be a low-part only multiply or even be completely left out.
3826
   E.g. to calculate the remainder of a division by 3 with a 32 bit
3827
   multiply, multiply with 0x55555556 and extract the upper two bits;
3828
   the result is exact for inputs up to 0x1fffffff.
3829
   The input range can be reduced by using cross-sum rules.
3830
   For odd divisors >= 3, the following table gives right shift counts
3831
   so that if a number is shifted by an integer multiple of the given
3832
   amount, the remainder stays the same:
3833
   2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3834
   14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3835
   0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3836
   20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3837
   0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3838
 
3839
   Cross-sum rules for even numbers can be derived by leaving as many bits
3840
   to the right alone as the divisor has zeros to the right.
3841
   E.g. if x is an unsigned 32 bit number:
3842
   (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3843
   */
3844
 
3845
rtx
3846
expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3847
               rtx op0, rtx op1, rtx target, int unsignedp)
3848
{
3849
  enum machine_mode compute_mode;
3850
  rtx tquotient;
3851
  rtx quotient = 0, remainder = 0;
3852
  rtx last;
3853
  int size;
3854
  rtx insn, set;
3855
  optab optab1, optab2;
3856
  int op1_is_constant, op1_is_pow2 = 0;
3857
  int max_cost, extra_cost;
3858
  static HOST_WIDE_INT last_div_const = 0;
3859
  static HOST_WIDE_INT ext_op1;
3860
  bool speed = optimize_insn_for_speed_p ();
3861
 
3862
  op1_is_constant = CONST_INT_P (op1);
3863
  if (op1_is_constant)
3864
    {
3865
      ext_op1 = INTVAL (op1);
3866
      if (unsignedp)
3867
        ext_op1 &= GET_MODE_MASK (mode);
3868
      op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3869
                     || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3870
    }
3871
 
3872
  /*
3873
     This is the structure of expand_divmod:
3874
 
3875
     First comes code to fix up the operands so we can perform the operations
3876
     correctly and efficiently.
3877
 
3878
     Second comes a switch statement with code specific for each rounding mode.
3879
     For some special operands this code emits all RTL for the desired
3880
     operation, for other cases, it generates only a quotient and stores it in
3881
     QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3882
     to indicate that it has not done anything.
3883
 
3884
     Last comes code that finishes the operation.  If QUOTIENT is set and
3885
     REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3886
     QUOTIENT is not set, it is computed using trunc rounding.
3887
 
3888
     We try to generate special code for division and remainder when OP1 is a
3889
     constant.  If |OP1| = 2**n we can use shifts and some other fast
3890
     operations.  For other values of OP1, we compute a carefully selected
3891
     fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3892
     by m.
3893
 
3894
     In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3895
     half of the product.  Different strategies for generating the product are
3896
     implemented in expand_mult_highpart.
3897
 
3898
     If what we actually want is the remainder, we generate that by another
3899
     by-constant multiplication and a subtraction.  */
3900
 
3901
  /* We shouldn't be called with OP1 == const1_rtx, but some of the
3902
     code below will malfunction if we are, so check here and handle
3903
     the special case if so.  */
3904
  if (op1 == const1_rtx)
3905
    return rem_flag ? const0_rtx : op0;
3906
 
3907
    /* When dividing by -1, we could get an overflow.
3908
     negv_optab can handle overflows.  */
3909
  if (! unsignedp && op1 == constm1_rtx)
3910
    {
3911
      if (rem_flag)
3912
        return const0_rtx;
3913
      return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3914
                          ? negv_optab : neg_optab, op0, target, 0);
3915
    }
3916
 
3917
  if (target
3918
      /* Don't use the function value register as a target
3919
         since we have to read it as well as write it,
3920
         and function-inlining gets confused by this.  */
3921
      && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3922
          /* Don't clobber an operand while doing a multi-step calculation.  */
3923
          || ((rem_flag || op1_is_constant)
3924
              && (reg_mentioned_p (target, op0)
3925
                  || (MEM_P (op0) && MEM_P (target))))
3926
          || reg_mentioned_p (target, op1)
3927
          || (MEM_P (op1) && MEM_P (target))))
3928
    target = 0;
3929
 
3930
  /* Get the mode in which to perform this computation.  Normally it will
3931
     be MODE, but sometimes we can't do the desired operation in MODE.
3932
     If so, pick a wider mode in which we can do the operation.  Convert
3933
     to that mode at the start to avoid repeated conversions.
3934
 
3935
     First see what operations we need.  These depend on the expression
3936
     we are evaluating.  (We assume that divxx3 insns exist under the
3937
     same conditions that modxx3 insns and that these insns don't normally
3938
     fail.  If these assumptions are not correct, we may generate less
3939
     efficient code in some cases.)
3940
 
3941
     Then see if we find a mode in which we can open-code that operation
3942
     (either a division, modulus, or shift).  Finally, check for the smallest
3943
     mode for which we can do the operation with a library call.  */
3944
 
3945
  /* We might want to refine this now that we have division-by-constant
3946
     optimization.  Since expand_mult_highpart tries so many variants, it is
3947
     not straightforward to generalize this.  Maybe we should make an array
3948
     of possible modes in init_expmed?  Save this for GCC 2.7.  */
3949
 
3950
  optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3951
            ? (unsignedp ? lshr_optab : ashr_optab)
3952
            : (unsignedp ? udiv_optab : sdiv_optab));
3953
  optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3954
            ? optab1
3955
            : (unsignedp ? udivmod_optab : sdivmod_optab));
3956
 
3957
  for (compute_mode = mode; compute_mode != VOIDmode;
3958
       compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3959
    if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3960
        || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3961
      break;
3962
 
3963
  if (compute_mode == VOIDmode)
3964
    for (compute_mode = mode; compute_mode != VOIDmode;
3965
         compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3966
      if (optab_libfunc (optab1, compute_mode)
3967
          || optab_libfunc (optab2, compute_mode))
3968
        break;
3969
 
3970
  /* If we still couldn't find a mode, use MODE, but expand_binop will
3971
     probably die.  */
3972
  if (compute_mode == VOIDmode)
3973
    compute_mode = mode;
3974
 
3975
  if (target && GET_MODE (target) == compute_mode)
3976
    tquotient = target;
3977
  else
3978
    tquotient = gen_reg_rtx (compute_mode);
3979
 
3980
  size = GET_MODE_BITSIZE (compute_mode);
3981
#if 0
3982
  /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3983
     (mode), and thereby get better code when OP1 is a constant.  Do that
3984
     later.  It will require going over all usages of SIZE below.  */
3985
  size = GET_MODE_BITSIZE (mode);
3986
#endif
3987
 
3988
  /* Only deduct something for a REM if the last divide done was
3989
     for a different constant.   Then set the constant of the last
3990
     divide.  */
3991
  max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3992
  if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3993
                     && INTVAL (op1) == last_div_const))
3994
    max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3995
 
3996
  last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3997
 
3998
  /* Now convert to the best mode to use.  */
3999
  if (compute_mode != mode)
4000
    {
4001
      op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4002
      op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4003
 
4004
      /* convert_modes may have placed op1 into a register, so we
4005
         must recompute the following.  */
4006
      op1_is_constant = CONST_INT_P (op1);
4007
      op1_is_pow2 = (op1_is_constant
4008
                     && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4009
                          || (! unsignedp
4010
                              && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4011
    }
4012
 
4013
  /* If one of the operands is a volatile MEM, copy it into a register.  */
4014
 
4015
  if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4016
    op0 = force_reg (compute_mode, op0);
4017
  if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4018
    op1 = force_reg (compute_mode, op1);
4019
 
4020
  /* If we need the remainder or if OP1 is constant, we need to
4021
     put OP0 in a register in case it has any queued subexpressions.  */
4022
  if (rem_flag || op1_is_constant)
4023
    op0 = force_reg (compute_mode, op0);
4024
 
4025
  last = get_last_insn ();
4026
 
4027
  /* Promote floor rounding to trunc rounding for unsigned operations.  */
4028
  if (unsignedp)
4029
    {
4030
      if (code == FLOOR_DIV_EXPR)
4031
        code = TRUNC_DIV_EXPR;
4032
      if (code == FLOOR_MOD_EXPR)
4033
        code = TRUNC_MOD_EXPR;
4034
      if (code == EXACT_DIV_EXPR && op1_is_pow2)
4035
        code = TRUNC_DIV_EXPR;
4036
    }
4037
 
4038
  if (op1 != const0_rtx)
4039
    switch (code)
4040
      {
4041
      case TRUNC_MOD_EXPR:
4042
      case TRUNC_DIV_EXPR:
4043
        if (op1_is_constant)
4044
          {
4045
            if (unsignedp)
4046
              {
4047
                unsigned HOST_WIDE_INT mh;
4048
                int pre_shift, post_shift;
4049
                int dummy;
4050
                rtx ml;
4051
                unsigned HOST_WIDE_INT d = (INTVAL (op1)
4052
                                            & GET_MODE_MASK (compute_mode));
4053
 
4054
                if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4055
                  {
4056
                    pre_shift = floor_log2 (d);
4057
                    if (rem_flag)
4058
                      {
4059
                        remainder
4060
                          = expand_binop (compute_mode, and_optab, op0,
4061
                                          GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4062
                                          remainder, 1,
4063
                                          OPTAB_LIB_WIDEN);
4064
                        if (remainder)
4065
                          return gen_lowpart (mode, remainder);
4066
                      }
4067
                    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4068
                                             build_int_cst (NULL_TREE,
4069
                                                            pre_shift),
4070
                                             tquotient, 1);
4071
                  }
4072
                else if (size <= HOST_BITS_PER_WIDE_INT)
4073
                  {
4074
                    if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4075
                      {
4076
                        /* Most significant bit of divisor is set; emit an scc
4077
                           insn.  */
4078
                        quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4079
                                                          compute_mode, 1, 1);
4080
                      }
4081
                    else
4082
                      {
4083
                        /* Find a suitable multiplier and right shift count
4084
                           instead of multiplying with D.  */
4085
 
4086
                        mh = choose_multiplier (d, size, size,
4087
                                                &ml, &post_shift, &dummy);
4088
 
4089
                        /* If the suggested multiplier is more than SIZE bits,
4090
                           we can do better for even divisors, using an
4091
                           initial right shift.  */
4092
                        if (mh != 0 && (d & 1) == 0)
4093
                          {
4094
                            pre_shift = floor_log2 (d & -d);
4095
                            mh = choose_multiplier (d >> pre_shift, size,
4096
                                                    size - pre_shift,
4097
                                                    &ml, &post_shift, &dummy);
4098
                            gcc_assert (!mh);
4099
                          }
4100
                        else
4101
                          pre_shift = 0;
4102
 
4103
                        if (mh != 0)
4104
                          {
4105
                            rtx t1, t2, t3, t4;
4106
 
4107
                            if (post_shift - 1 >= BITS_PER_WORD)
4108
                              goto fail1;
4109
 
4110
                            extra_cost
4111
                              = (shift_cost[speed][compute_mode][post_shift - 1]
4112
                                 + shift_cost[speed][compute_mode][1]
4113
                                 + 2 * add_cost[speed][compute_mode]);
4114
                            t1 = expand_mult_highpart (compute_mode, op0, ml,
4115
                                                       NULL_RTX, 1,
4116
                                                       max_cost - extra_cost);
4117
                            if (t1 == 0)
4118
                              goto fail1;
4119
                            t2 = force_operand (gen_rtx_MINUS (compute_mode,
4120
                                                               op0, t1),
4121
                                                NULL_RTX);
4122
                            t3 = expand_shift
4123
                              (RSHIFT_EXPR, compute_mode, t2,
4124
                               build_int_cst (NULL_TREE, 1),
4125
                               NULL_RTX,1);
4126
                            t4 = force_operand (gen_rtx_PLUS (compute_mode,
4127
                                                              t1, t3),
4128
                                                NULL_RTX);
4129
                            quotient = expand_shift
4130
                              (RSHIFT_EXPR, compute_mode, t4,
4131
                               build_int_cst (NULL_TREE, post_shift - 1),
4132
                               tquotient, 1);
4133
                          }
4134
                        else
4135
                          {
4136
                            rtx t1, t2;
4137
 
4138
                            if (pre_shift >= BITS_PER_WORD
4139
                                || post_shift >= BITS_PER_WORD)
4140
                              goto fail1;
4141
 
4142
                            t1 = expand_shift
4143
                              (RSHIFT_EXPR, compute_mode, op0,
4144
                               build_int_cst (NULL_TREE, pre_shift),
4145
                               NULL_RTX, 1);
4146
                            extra_cost
4147
                              = (shift_cost[speed][compute_mode][pre_shift]
4148
                                 + shift_cost[speed][compute_mode][post_shift]);
4149
                            t2 = expand_mult_highpart (compute_mode, t1, ml,
4150
                                                       NULL_RTX, 1,
4151
                                                       max_cost - extra_cost);
4152
                            if (t2 == 0)
4153
                              goto fail1;
4154
                            quotient = expand_shift
4155
                              (RSHIFT_EXPR, compute_mode, t2,
4156
                               build_int_cst (NULL_TREE, post_shift),
4157
                               tquotient, 1);
4158
                          }
4159
                      }
4160
                  }
4161
                else            /* Too wide mode to use tricky code */
4162
                  break;
4163
 
4164
                insn = get_last_insn ();
4165
                if (insn != last
4166
                    && (set = single_set (insn)) != 0
4167
                    && SET_DEST (set) == quotient)
4168
                  set_unique_reg_note (insn,
4169
                                       REG_EQUAL,
4170
                                       gen_rtx_UDIV (compute_mode, op0, op1));
4171
              }
4172
            else                /* TRUNC_DIV, signed */
4173
              {
4174
                unsigned HOST_WIDE_INT ml;
4175
                int lgup, post_shift;
4176
                rtx mlr;
4177
                HOST_WIDE_INT d = INTVAL (op1);
4178
                unsigned HOST_WIDE_INT abs_d;
4179
 
4180
                /* Since d might be INT_MIN, we have to cast to
4181
                   unsigned HOST_WIDE_INT before negating to avoid
4182
                   undefined signed overflow.  */
4183
                abs_d = (d >= 0
4184
                         ? (unsigned HOST_WIDE_INT) d
4185
                         : - (unsigned HOST_WIDE_INT) d);
4186
 
4187
                /* n rem d = n rem -d */
4188
                if (rem_flag && d < 0)
4189
                  {
4190
                    d = abs_d;
4191
                    op1 = gen_int_mode (abs_d, compute_mode);
4192
                  }
4193
 
4194
                if (d == 1)
4195
                  quotient = op0;
4196
                else if (d == -1)
4197
                  quotient = expand_unop (compute_mode, neg_optab, op0,
4198
                                          tquotient, 0);
4199
                else if (HOST_BITS_PER_WIDE_INT >= size
4200
                         && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4201
                  {
4202
                    /* This case is not handled correctly below.  */
4203
                    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4204
                                                compute_mode, 1, 1);
4205
                    if (quotient == 0)
4206
                      goto fail1;
4207
                  }
4208
                else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4209
                         && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4210
                                      : sdiv_pow2_cheap[speed][compute_mode])
4211
                         /* We assume that cheap metric is true if the
4212
                            optab has an expander for this mode.  */
4213
                         && ((optab_handler ((rem_flag ? smod_optab
4214
                                              : sdiv_optab),
4215
                                              compute_mode)->insn_code
4216
                              != CODE_FOR_nothing)
4217
                             || (optab_handler(sdivmod_optab,
4218
                                               compute_mode)
4219
                                 ->insn_code != CODE_FOR_nothing)))
4220
                  ;
4221
                else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4222
                  {
4223
                    if (rem_flag)
4224
                      {
4225
                        remainder = expand_smod_pow2 (compute_mode, op0, d);
4226
                        if (remainder)
4227
                          return gen_lowpart (mode, remainder);
4228
                      }
4229
 
4230
                    if (sdiv_pow2_cheap[speed][compute_mode]
4231
                        && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4232
                             != CODE_FOR_nothing)
4233
                            || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4234
                                != CODE_FOR_nothing)))
4235
                      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4236
                                                compute_mode, op0,
4237
                                                gen_int_mode (abs_d,
4238
                                                              compute_mode),
4239
                                                NULL_RTX, 0);
4240
                    else
4241
                      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4242
 
4243
                    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4244
                       negate the quotient.  */
4245
                    if (d < 0)
4246
                      {
4247
                        insn = get_last_insn ();
4248
                        if (insn != last
4249
                            && (set = single_set (insn)) != 0
4250
                            && SET_DEST (set) == quotient
4251
                            && abs_d < ((unsigned HOST_WIDE_INT) 1
4252
                                        << (HOST_BITS_PER_WIDE_INT - 1)))
4253
                          set_unique_reg_note (insn,
4254
                                               REG_EQUAL,
4255
                                               gen_rtx_DIV (compute_mode,
4256
                                                            op0,
4257
                                                            GEN_INT
4258
                                                            (trunc_int_for_mode
4259
                                                             (abs_d,
4260
                                                              compute_mode))));
4261
 
4262
                        quotient = expand_unop (compute_mode, neg_optab,
4263
                                                quotient, quotient, 0);
4264
                      }
4265
                  }
4266
                else if (size <= HOST_BITS_PER_WIDE_INT)
4267
                  {
4268
                    choose_multiplier (abs_d, size, size - 1,
4269
                                       &mlr, &post_shift, &lgup);
4270
                    ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4271
                    if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4272
                      {
4273
                        rtx t1, t2, t3;
4274
 
4275
                        if (post_shift >= BITS_PER_WORD
4276
                            || size - 1 >= BITS_PER_WORD)
4277
                          goto fail1;
4278
 
4279
                        extra_cost = (shift_cost[speed][compute_mode][post_shift]
4280
                                      + shift_cost[speed][compute_mode][size - 1]
4281
                                      + add_cost[speed][compute_mode]);
4282
                        t1 = expand_mult_highpart (compute_mode, op0, mlr,
4283
                                                   NULL_RTX, 0,
4284
                                                   max_cost - extra_cost);
4285
                        if (t1 == 0)
4286
                          goto fail1;
4287
                        t2 = expand_shift
4288
                          (RSHIFT_EXPR, compute_mode, t1,
4289
                           build_int_cst (NULL_TREE, post_shift),
4290
                           NULL_RTX, 0);
4291
                        t3 = expand_shift
4292
                          (RSHIFT_EXPR, compute_mode, op0,
4293
                           build_int_cst (NULL_TREE, size - 1),
4294
                           NULL_RTX, 0);
4295
                        if (d < 0)
4296
                          quotient
4297
                            = force_operand (gen_rtx_MINUS (compute_mode,
4298
                                                            t3, t2),
4299
                                             tquotient);
4300
                        else
4301
                          quotient
4302
                            = force_operand (gen_rtx_MINUS (compute_mode,
4303
                                                            t2, t3),
4304
                                             tquotient);
4305
                      }
4306
                    else
4307
                      {
4308
                        rtx t1, t2, t3, t4;
4309
 
4310
                        if (post_shift >= BITS_PER_WORD
4311
                            || size - 1 >= BITS_PER_WORD)
4312
                          goto fail1;
4313
 
4314
                        ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4315
                        mlr = gen_int_mode (ml, compute_mode);
4316
                        extra_cost = (shift_cost[speed][compute_mode][post_shift]
4317
                                      + shift_cost[speed][compute_mode][size - 1]
4318
                                      + 2 * add_cost[speed][compute_mode]);
4319
                        t1 = expand_mult_highpart (compute_mode, op0, mlr,
4320
                                                   NULL_RTX, 0,
4321
                                                   max_cost - extra_cost);
4322
                        if (t1 == 0)
4323
                          goto fail1;
4324
                        t2 = force_operand (gen_rtx_PLUS (compute_mode,
4325
                                                          t1, op0),
4326
                                            NULL_RTX);
4327
                        t3 = expand_shift
4328
                          (RSHIFT_EXPR, compute_mode, t2,
4329
                           build_int_cst (NULL_TREE, post_shift),
4330
                           NULL_RTX, 0);
4331
                        t4 = expand_shift
4332
                          (RSHIFT_EXPR, compute_mode, op0,
4333
                           build_int_cst (NULL_TREE, size - 1),
4334
                           NULL_RTX, 0);
4335
                        if (d < 0)
4336
                          quotient
4337
                            = force_operand (gen_rtx_MINUS (compute_mode,
4338
                                                            t4, t3),
4339
                                             tquotient);
4340
                        else
4341
                          quotient
4342
                            = force_operand (gen_rtx_MINUS (compute_mode,
4343
                                                            t3, t4),
4344
                                             tquotient);
4345
                      }
4346
                  }
4347
                else            /* Too wide mode to use tricky code */
4348
                  break;
4349
 
4350
                insn = get_last_insn ();
4351
                if (insn != last
4352
                    && (set = single_set (insn)) != 0
4353
                    && SET_DEST (set) == quotient)
4354
                  set_unique_reg_note (insn,
4355
                                       REG_EQUAL,
4356
                                       gen_rtx_DIV (compute_mode, op0, op1));
4357
              }
4358
            break;
4359
          }
4360
      fail1:
4361
        delete_insns_since (last);
4362
        break;
4363
 
4364
      case FLOOR_DIV_EXPR:
4365
      case FLOOR_MOD_EXPR:
4366
      /* We will come here only for signed operations.  */
4367
        if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4368
          {
4369
            unsigned HOST_WIDE_INT mh;
4370
            int pre_shift, lgup, post_shift;
4371
            HOST_WIDE_INT d = INTVAL (op1);
4372
            rtx ml;
4373
 
4374
            if (d > 0)
4375
              {
4376
                /* We could just as easily deal with negative constants here,
4377
                   but it does not seem worth the trouble for GCC 2.6.  */
4378
                if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4379
                  {
4380
                    pre_shift = floor_log2 (d);
4381
                    if (rem_flag)
4382
                      {
4383
                        remainder = expand_binop (compute_mode, and_optab, op0,
4384
                                                  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4385
                                                  remainder, 0, OPTAB_LIB_WIDEN);
4386
                        if (remainder)
4387
                          return gen_lowpart (mode, remainder);
4388
                      }
4389
                    quotient = expand_shift
4390
                      (RSHIFT_EXPR, compute_mode, op0,
4391
                       build_int_cst (NULL_TREE, pre_shift),
4392
                       tquotient, 0);
4393
                  }
4394
                else
4395
                  {
4396
                    rtx t1, t2, t3, t4;
4397
 
4398
                    mh = choose_multiplier (d, size, size - 1,
4399
                                            &ml, &post_shift, &lgup);
4400
                    gcc_assert (!mh);
4401
 
4402
                    if (post_shift < BITS_PER_WORD
4403
                        && size - 1 < BITS_PER_WORD)
4404
                      {
4405
                        t1 = expand_shift
4406
                          (RSHIFT_EXPR, compute_mode, op0,
4407
                           build_int_cst (NULL_TREE, size - 1),
4408
                           NULL_RTX, 0);
4409
                        t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4410
                                           NULL_RTX, 0, OPTAB_WIDEN);
4411
                        extra_cost = (shift_cost[speed][compute_mode][post_shift]
4412
                                      + shift_cost[speed][compute_mode][size - 1]
4413
                                      + 2 * add_cost[speed][compute_mode]);
4414
                        t3 = expand_mult_highpart (compute_mode, t2, ml,
4415
                                                   NULL_RTX, 1,
4416
                                                   max_cost - extra_cost);
4417
                        if (t3 != 0)
4418
                          {
4419
                            t4 = expand_shift
4420
                              (RSHIFT_EXPR, compute_mode, t3,
4421
                               build_int_cst (NULL_TREE, post_shift),
4422
                               NULL_RTX, 1);
4423
                            quotient = expand_binop (compute_mode, xor_optab,
4424
                                                     t4, t1, tquotient, 0,
4425
                                                     OPTAB_WIDEN);
4426
                          }
4427
                      }
4428
                  }
4429
              }
4430
            else
4431
              {
4432
                rtx nsign, t1, t2, t3, t4;
4433
                t1 = force_operand (gen_rtx_PLUS (compute_mode,
4434
                                                  op0, constm1_rtx), NULL_RTX);
4435
                t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4436
                                   0, OPTAB_WIDEN);
4437
                nsign = expand_shift
4438
                  (RSHIFT_EXPR, compute_mode, t2,
4439
                   build_int_cst (NULL_TREE, size - 1),
4440
                   NULL_RTX, 0);
4441
                t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4442
                                    NULL_RTX);
4443
                t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4444
                                    NULL_RTX, 0);
4445
                if (t4)
4446
                  {
4447
                    rtx t5;
4448
                    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4449
                                      NULL_RTX, 0);
4450
                    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4451
                                                            t4, t5),
4452
                                              tquotient);
4453
                  }
4454
              }
4455
          }
4456
 
4457
        if (quotient != 0)
4458
          break;
4459
        delete_insns_since (last);
4460
 
4461
        /* Try using an instruction that produces both the quotient and
4462
           remainder, using truncation.  We can easily compensate the quotient
4463
           or remainder to get floor rounding, once we have the remainder.
4464
           Notice that we compute also the final remainder value here,
4465
           and return the result right away.  */
4466
        if (target == 0 || GET_MODE (target) != compute_mode)
4467
          target = gen_reg_rtx (compute_mode);
4468
 
4469
        if (rem_flag)
4470
          {
4471
            remainder
4472
              = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4473
            quotient = gen_reg_rtx (compute_mode);
4474
          }
4475
        else
4476
          {
4477
            quotient
4478
              = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4479
            remainder = gen_reg_rtx (compute_mode);
4480
          }
4481
 
4482
        if (expand_twoval_binop (sdivmod_optab, op0, op1,
4483
                                 quotient, remainder, 0))
4484
          {
4485
            /* This could be computed with a branch-less sequence.
4486
               Save that for later.  */
4487
            rtx tem;
4488
            rtx label = gen_label_rtx ();
4489
            do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4490
            tem = expand_binop (compute_mode, xor_optab, op0, op1,
4491
                                NULL_RTX, 0, OPTAB_WIDEN);
4492
            do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4493
            expand_dec (quotient, const1_rtx);
4494
            expand_inc (remainder, op1);
4495
            emit_label (label);
4496
            return gen_lowpart (mode, rem_flag ? remainder : quotient);
4497
          }
4498
 
4499
        /* No luck with division elimination or divmod.  Have to do it
4500
           by conditionally adjusting op0 *and* the result.  */
4501
        {
4502
          rtx label1, label2, label3, label4, label5;
4503
          rtx adjusted_op0;
4504
          rtx tem;
4505
 
4506
          quotient = gen_reg_rtx (compute_mode);
4507
          adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4508
          label1 = gen_label_rtx ();
4509
          label2 = gen_label_rtx ();
4510
          label3 = gen_label_rtx ();
4511
          label4 = gen_label_rtx ();
4512
          label5 = gen_label_rtx ();
4513
          do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4514
          do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4515
          tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4516
                              quotient, 0, OPTAB_LIB_WIDEN);
4517
          if (tem != quotient)
4518
            emit_move_insn (quotient, tem);
4519
          emit_jump_insn (gen_jump (label5));
4520
          emit_barrier ();
4521
          emit_label (label1);
4522
          expand_inc (adjusted_op0, const1_rtx);
4523
          emit_jump_insn (gen_jump (label4));
4524
          emit_barrier ();
4525
          emit_label (label2);
4526
          do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4527
          tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4528
                              quotient, 0, OPTAB_LIB_WIDEN);
4529
          if (tem != quotient)
4530
            emit_move_insn (quotient, tem);
4531
          emit_jump_insn (gen_jump (label5));
4532
          emit_barrier ();
4533
          emit_label (label3);
4534
          expand_dec (adjusted_op0, const1_rtx);
4535
          emit_label (label4);
4536
          tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4537
                              quotient, 0, OPTAB_LIB_WIDEN);
4538
          if (tem != quotient)
4539
            emit_move_insn (quotient, tem);
4540
          expand_dec (quotient, const1_rtx);
4541
          emit_label (label5);
4542
        }
4543
        break;
4544
 
4545
      case CEIL_DIV_EXPR:
4546
      case CEIL_MOD_EXPR:
4547
        if (unsignedp)
4548
          {
4549
            if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4550
              {
4551
                rtx t1, t2, t3;
4552
                unsigned HOST_WIDE_INT d = INTVAL (op1);
4553
                t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4554
                                   build_int_cst (NULL_TREE, floor_log2 (d)),
4555
                                   tquotient, 1);
4556
                t2 = expand_binop (compute_mode, and_optab, op0,
4557
                                   GEN_INT (d - 1),
4558
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4559
                t3 = gen_reg_rtx (compute_mode);
4560
                t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4561
                                      compute_mode, 1, 1);
4562
                if (t3 == 0)
4563
                  {
4564
                    rtx lab;
4565
                    lab = gen_label_rtx ();
4566
                    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4567
                    expand_inc (t1, const1_rtx);
4568
                    emit_label (lab);
4569
                    quotient = t1;
4570
                  }
4571
                else
4572
                  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4573
                                                          t1, t3),
4574
                                            tquotient);
4575
                break;
4576
              }
4577
 
4578
            /* Try using an instruction that produces both the quotient and
4579
               remainder, using truncation.  We can easily compensate the
4580
               quotient or remainder to get ceiling rounding, once we have the
4581
               remainder.  Notice that we compute also the final remainder
4582
               value here, and return the result right away.  */
4583
            if (target == 0 || GET_MODE (target) != compute_mode)
4584
              target = gen_reg_rtx (compute_mode);
4585
 
4586
            if (rem_flag)
4587
              {
4588
                remainder = (REG_P (target)
4589
                             ? target : gen_reg_rtx (compute_mode));
4590
                quotient = gen_reg_rtx (compute_mode);
4591
              }
4592
            else
4593
              {
4594
                quotient = (REG_P (target)
4595
                            ? target : gen_reg_rtx (compute_mode));
4596
                remainder = gen_reg_rtx (compute_mode);
4597
              }
4598
 
4599
            if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4600
                                     remainder, 1))
4601
              {
4602
                /* This could be computed with a branch-less sequence.
4603
                   Save that for later.  */
4604
                rtx label = gen_label_rtx ();
4605
                do_cmp_and_jump (remainder, const0_rtx, EQ,
4606
                                 compute_mode, label);
4607
                expand_inc (quotient, const1_rtx);
4608
                expand_dec (remainder, op1);
4609
                emit_label (label);
4610
                return gen_lowpart (mode, rem_flag ? remainder : quotient);
4611
              }
4612
 
4613
            /* No luck with division elimination or divmod.  Have to do it
4614
               by conditionally adjusting op0 *and* the result.  */
4615
            {
4616
              rtx label1, label2;
4617
              rtx adjusted_op0, tem;
4618
 
4619
              quotient = gen_reg_rtx (compute_mode);
4620
              adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4621
              label1 = gen_label_rtx ();
4622
              label2 = gen_label_rtx ();
4623
              do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4624
                               compute_mode, label1);
4625
              emit_move_insn  (quotient, const0_rtx);
4626
              emit_jump_insn (gen_jump (label2));
4627
              emit_barrier ();
4628
              emit_label (label1);
4629
              expand_dec (adjusted_op0, const1_rtx);
4630
              tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4631
                                  quotient, 1, OPTAB_LIB_WIDEN);
4632
              if (tem != quotient)
4633
                emit_move_insn (quotient, tem);
4634
              expand_inc (quotient, const1_rtx);
4635
              emit_label (label2);
4636
            }
4637
          }
4638
        else /* signed */
4639
          {
4640
            if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4641
                && INTVAL (op1) >= 0)
4642
              {
4643
                /* This is extremely similar to the code for the unsigned case
4644
                   above.  For 2.7 we should merge these variants, but for
4645
                   2.6.1 I don't want to touch the code for unsigned since that
4646
                   get used in C.  The signed case will only be used by other
4647
                   languages (Ada).  */
4648
 
4649
                rtx t1, t2, t3;
4650
                unsigned HOST_WIDE_INT d = INTVAL (op1);
4651
                t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4652
                                   build_int_cst (NULL_TREE, floor_log2 (d)),
4653
                                   tquotient, 0);
4654
                t2 = expand_binop (compute_mode, and_optab, op0,
4655
                                   GEN_INT (d - 1),
4656
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4657
                t3 = gen_reg_rtx (compute_mode);
4658
                t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4659
                                      compute_mode, 1, 1);
4660
                if (t3 == 0)
4661
                  {
4662
                    rtx lab;
4663
                    lab = gen_label_rtx ();
4664
                    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4665
                    expand_inc (t1, const1_rtx);
4666
                    emit_label (lab);
4667
                    quotient = t1;
4668
                  }
4669
                else
4670
                  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4671
                                                          t1, t3),
4672
                                            tquotient);
4673
                break;
4674
              }
4675
 
4676
            /* Try using an instruction that produces both the quotient and
4677
               remainder, using truncation.  We can easily compensate the
4678
               quotient or remainder to get ceiling rounding, once we have the
4679
               remainder.  Notice that we compute also the final remainder
4680
               value here, and return the result right away.  */
4681
            if (target == 0 || GET_MODE (target) != compute_mode)
4682
              target = gen_reg_rtx (compute_mode);
4683
            if (rem_flag)
4684
              {
4685
                remainder= (REG_P (target)
4686
                            ? target : gen_reg_rtx (compute_mode));
4687
                quotient = gen_reg_rtx (compute_mode);
4688
              }
4689
            else
4690
              {
4691
                quotient = (REG_P (target)
4692
                            ? target : gen_reg_rtx (compute_mode));
4693
                remainder = gen_reg_rtx (compute_mode);
4694
              }
4695
 
4696
            if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4697
                                     remainder, 0))
4698
              {
4699
                /* This could be computed with a branch-less sequence.
4700
                   Save that for later.  */
4701
                rtx tem;
4702
                rtx label = gen_label_rtx ();
4703
                do_cmp_and_jump (remainder, const0_rtx, EQ,
4704
                                 compute_mode, label);
4705
                tem = expand_binop (compute_mode, xor_optab, op0, op1,
4706
                                    NULL_RTX, 0, OPTAB_WIDEN);
4707
                do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4708
                expand_inc (quotient, const1_rtx);
4709
                expand_dec (remainder, op1);
4710
                emit_label (label);
4711
                return gen_lowpart (mode, rem_flag ? remainder : quotient);
4712
              }
4713
 
4714
            /* No luck with division elimination or divmod.  Have to do it
4715
               by conditionally adjusting op0 *and* the result.  */
4716
            {
4717
              rtx label1, label2, label3, label4, label5;
4718
              rtx adjusted_op0;
4719
              rtx tem;
4720
 
4721
              quotient = gen_reg_rtx (compute_mode);
4722
              adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4723
              label1 = gen_label_rtx ();
4724
              label2 = gen_label_rtx ();
4725
              label3 = gen_label_rtx ();
4726
              label4 = gen_label_rtx ();
4727
              label5 = gen_label_rtx ();
4728
              do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4729
              do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4730
                               compute_mode, label1);
4731
              tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4732
                                  quotient, 0, OPTAB_LIB_WIDEN);
4733
              if (tem != quotient)
4734
                emit_move_insn (quotient, tem);
4735
              emit_jump_insn (gen_jump (label5));
4736
              emit_barrier ();
4737
              emit_label (label1);
4738
              expand_dec (adjusted_op0, const1_rtx);
4739
              emit_jump_insn (gen_jump (label4));
4740
              emit_barrier ();
4741
              emit_label (label2);
4742
              do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4743
                               compute_mode, label3);
4744
              tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4745
                                  quotient, 0, OPTAB_LIB_WIDEN);
4746
              if (tem != quotient)
4747
                emit_move_insn (quotient, tem);
4748
              emit_jump_insn (gen_jump (label5));
4749
              emit_barrier ();
4750
              emit_label (label3);
4751
              expand_inc (adjusted_op0, const1_rtx);
4752
              emit_label (label4);
4753
              tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4754
                                  quotient, 0, OPTAB_LIB_WIDEN);
4755
              if (tem != quotient)
4756
                emit_move_insn (quotient, tem);
4757
              expand_inc (quotient, const1_rtx);
4758
              emit_label (label5);
4759
            }
4760
          }
4761
        break;
4762
 
4763
      case EXACT_DIV_EXPR:
4764
        if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4765
          {
4766
            HOST_WIDE_INT d = INTVAL (op1);
4767
            unsigned HOST_WIDE_INT ml;
4768
            int pre_shift;
4769
            rtx t1;
4770
 
4771
            pre_shift = floor_log2 (d & -d);
4772
            ml = invert_mod2n (d >> pre_shift, size);
4773
            t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4774
                               build_int_cst (NULL_TREE, pre_shift),
4775
                               NULL_RTX, unsignedp);
4776
            quotient = expand_mult (compute_mode, t1,
4777
                                    gen_int_mode (ml, compute_mode),
4778
                                    NULL_RTX, 1);
4779
 
4780
            insn = get_last_insn ();
4781
            set_unique_reg_note (insn,
4782
                                 REG_EQUAL,
4783
                                 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4784
                                                 compute_mode,
4785
                                                 op0, op1));
4786
          }
4787
        break;
4788
 
4789
      case ROUND_DIV_EXPR:
4790
      case ROUND_MOD_EXPR:
4791
        if (unsignedp)
4792
          {
4793
            rtx tem;
4794
            rtx label;
4795
            label = gen_label_rtx ();
4796
            quotient = gen_reg_rtx (compute_mode);
4797
            remainder = gen_reg_rtx (compute_mode);
4798
            if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4799
              {
4800
                rtx tem;
4801
                quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4802
                                         quotient, 1, OPTAB_LIB_WIDEN);
4803
                tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4804
                remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4805
                                          remainder, 1, OPTAB_LIB_WIDEN);
4806
              }
4807
            tem = plus_constant (op1, -1);
4808
            tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4809
                                build_int_cst (NULL_TREE, 1),
4810
                                NULL_RTX, 1);
4811
            do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4812
            expand_inc (quotient, const1_rtx);
4813
            expand_dec (remainder, op1);
4814
            emit_label (label);
4815
          }
4816
        else
4817
          {
4818
            rtx abs_rem, abs_op1, tem, mask;
4819
            rtx label;
4820
            label = gen_label_rtx ();
4821
            quotient = gen_reg_rtx (compute_mode);
4822
            remainder = gen_reg_rtx (compute_mode);
4823
            if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4824
              {
4825
                rtx tem;
4826
                quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4827
                                         quotient, 0, OPTAB_LIB_WIDEN);
4828
                tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4829
                remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4830
                                          remainder, 0, OPTAB_LIB_WIDEN);
4831
              }
4832
            abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4833
            abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4834
            tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4835
                                build_int_cst (NULL_TREE, 1),
4836
                                NULL_RTX, 1);
4837
            do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4838
            tem = expand_binop (compute_mode, xor_optab, op0, op1,
4839
                                NULL_RTX, 0, OPTAB_WIDEN);
4840
            mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4841
                                 build_int_cst (NULL_TREE, size - 1),
4842
                                 NULL_RTX, 0);
4843
            tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4844
                                NULL_RTX, 0, OPTAB_WIDEN);
4845
            tem = expand_binop (compute_mode, sub_optab, tem, mask,
4846
                                NULL_RTX, 0, OPTAB_WIDEN);
4847
            expand_inc (quotient, tem);
4848
            tem = expand_binop (compute_mode, xor_optab, mask, op1,
4849
                                NULL_RTX, 0, OPTAB_WIDEN);
4850
            tem = expand_binop (compute_mode, sub_optab, tem, mask,
4851
                                NULL_RTX, 0, OPTAB_WIDEN);
4852
            expand_dec (remainder, tem);
4853
            emit_label (label);
4854
          }
4855
        return gen_lowpart (mode, rem_flag ? remainder : quotient);
4856
 
4857
      default:
4858
        gcc_unreachable ();
4859
      }
4860
 
4861
  if (quotient == 0)
4862
    {
4863
      if (target && GET_MODE (target) != compute_mode)
4864
        target = 0;
4865
 
4866
      if (rem_flag)
4867
        {
4868
          /* Try to produce the remainder without producing the quotient.
4869
             If we seem to have a divmod pattern that does not require widening,
4870
             don't try widening here.  We should really have a WIDEN argument
4871
             to expand_twoval_binop, since what we'd really like to do here is
4872
             1) try a mod insn in compute_mode
4873
             2) try a divmod insn in compute_mode
4874
             3) try a div insn in compute_mode and multiply-subtract to get
4875
                remainder
4876
             4) try the same things with widening allowed.  */
4877
          remainder
4878
            = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4879
                                 op0, op1, target,
4880
                                 unsignedp,
4881
                                 ((optab_handler (optab2, compute_mode)->insn_code
4882
                                   != CODE_FOR_nothing)
4883
                                  ? OPTAB_DIRECT : OPTAB_WIDEN));
4884
          if (remainder == 0)
4885
            {
4886
              /* No luck there.  Can we do remainder and divide at once
4887
                 without a library call?  */
4888
              remainder = gen_reg_rtx (compute_mode);
4889
              if (! expand_twoval_binop ((unsignedp
4890
                                          ? udivmod_optab
4891
                                          : sdivmod_optab),
4892
                                         op0, op1,
4893
                                         NULL_RTX, remainder, unsignedp))
4894
                remainder = 0;
4895
            }
4896
 
4897
          if (remainder)
4898
            return gen_lowpart (mode, remainder);
4899
        }
4900
 
4901
      /* Produce the quotient.  Try a quotient insn, but not a library call.
4902
         If we have a divmod in this mode, use it in preference to widening
4903
         the div (for this test we assume it will not fail). Note that optab2
4904
         is set to the one of the two optabs that the call below will use.  */
4905
      quotient
4906
        = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4907
                             op0, op1, rem_flag ? NULL_RTX : target,
4908
                             unsignedp,
4909
                             ((optab_handler (optab2, compute_mode)->insn_code
4910
                               != CODE_FOR_nothing)
4911
                              ? OPTAB_DIRECT : OPTAB_WIDEN));
4912
 
4913
      if (quotient == 0)
4914
        {
4915
          /* No luck there.  Try a quotient-and-remainder insn,
4916
             keeping the quotient alone.  */
4917
          quotient = gen_reg_rtx (compute_mode);
4918
          if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4919
                                     op0, op1,
4920
                                     quotient, NULL_RTX, unsignedp))
4921
            {
4922
              quotient = 0;
4923
              if (! rem_flag)
4924
                /* Still no luck.  If we are not computing the remainder,
4925
                   use a library call for the quotient.  */
4926
                quotient = sign_expand_binop (compute_mode,
4927
                                              udiv_optab, sdiv_optab,
4928
                                              op0, op1, target,
4929
                                              unsignedp, OPTAB_LIB_WIDEN);
4930
            }
4931
        }
4932
    }
4933
 
4934
  if (rem_flag)
4935
    {
4936
      if (target && GET_MODE (target) != compute_mode)
4937
        target = 0;
4938
 
4939
      if (quotient == 0)
4940
        {
4941
          /* No divide instruction either.  Use library for remainder.  */
4942
          remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4943
                                         op0, op1, target,
4944
                                         unsignedp, OPTAB_LIB_WIDEN);
4945
          /* No remainder function.  Try a quotient-and-remainder
4946
             function, keeping the remainder.  */
4947
          if (!remainder)
4948
            {
4949
              remainder = gen_reg_rtx (compute_mode);
4950
              if (!expand_twoval_binop_libfunc
4951
                  (unsignedp ? udivmod_optab : sdivmod_optab,
4952
                   op0, op1,
4953
                   NULL_RTX, remainder,
4954
                   unsignedp ? UMOD : MOD))
4955
                remainder = NULL_RTX;
4956
            }
4957
        }
4958
      else
4959
        {
4960
          /* We divided.  Now finish doing X - Y * (X / Y).  */
4961
          remainder = expand_mult (compute_mode, quotient, op1,
4962
                                   NULL_RTX, unsignedp);
4963
          remainder = expand_binop (compute_mode, sub_optab, op0,
4964
                                    remainder, target, unsignedp,
4965
                                    OPTAB_LIB_WIDEN);
4966
        }
4967
    }
4968
 
4969
  return gen_lowpart (mode, rem_flag ? remainder : quotient);
4970
}
4971
 
4972
/* Return a tree node with data type TYPE, describing the value of X.
4973
   Usually this is an VAR_DECL, if there is no obvious better choice.
4974
   X may be an expression, however we only support those expressions
4975
   generated by loop.c.  */
4976
 
4977
tree
4978
make_tree (tree type, rtx x)
4979
{
4980
  tree t;
4981
 
4982
  switch (GET_CODE (x))
4983
    {
4984
    case CONST_INT:
4985
      {
4986
        HOST_WIDE_INT hi = 0;
4987
 
4988
        if (INTVAL (x) < 0
4989
            && !(TYPE_UNSIGNED (type)
4990
                 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4991
                     < HOST_BITS_PER_WIDE_INT)))
4992
          hi = -1;
4993
 
4994
        t = build_int_cst_wide (type, INTVAL (x), hi);
4995
 
4996
        return t;
4997
      }
4998
 
4999
    case CONST_DOUBLE:
5000
      if (GET_MODE (x) == VOIDmode)
5001
        t = build_int_cst_wide (type,
5002
                                CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5003
      else
5004
        {
5005
          REAL_VALUE_TYPE d;
5006
 
5007
          REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5008
          t = build_real (type, d);
5009
        }
5010
 
5011
      return t;
5012
 
5013
    case CONST_VECTOR:
5014
      {
5015
        int units = CONST_VECTOR_NUNITS (x);
5016
        tree itype = TREE_TYPE (type);
5017
        tree t = NULL_TREE;
5018
        int i;
5019
 
5020
 
5021
        /* Build a tree with vector elements.  */
5022
        for (i = units - 1; i >= 0; --i)
5023
          {
5024
            rtx elt = CONST_VECTOR_ELT (x, i);
5025
            t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
5026
          }
5027
 
5028
        return build_vector (type, t);
5029
      }
5030
 
5031
    case PLUS:
5032
      return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5033
                          make_tree (type, XEXP (x, 1)));
5034
 
5035
    case MINUS:
5036
      return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5037
                          make_tree (type, XEXP (x, 1)));
5038
 
5039
    case NEG:
5040
      return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5041
 
5042
    case MULT:
5043
      return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5044
                          make_tree (type, XEXP (x, 1)));
5045
 
5046
    case ASHIFT:
5047
      return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5048
                          make_tree (type, XEXP (x, 1)));
5049
 
5050
    case LSHIFTRT:
5051
      t = unsigned_type_for (type);
5052
      return fold_convert (type, build2 (RSHIFT_EXPR, t,
5053
                                         make_tree (t, XEXP (x, 0)),
5054
                                         make_tree (type, XEXP (x, 1))));
5055
 
5056
    case ASHIFTRT:
5057
      t = signed_type_for (type);
5058
      return fold_convert (type, build2 (RSHIFT_EXPR, t,
5059
                                         make_tree (t, XEXP (x, 0)),
5060
                                         make_tree (type, XEXP (x, 1))));
5061
 
5062
    case DIV:
5063
      if (TREE_CODE (type) != REAL_TYPE)
5064
        t = signed_type_for (type);
5065
      else
5066
        t = type;
5067
 
5068
      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5069
                                         make_tree (t, XEXP (x, 0)),
5070
                                         make_tree (t, XEXP (x, 1))));
5071
    case UDIV:
5072
      t = unsigned_type_for (type);
5073
      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5074
                                         make_tree (t, XEXP (x, 0)),
5075
                                         make_tree (t, XEXP (x, 1))));
5076
 
5077
    case SIGN_EXTEND:
5078
    case ZERO_EXTEND:
5079
      t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5080
                                          GET_CODE (x) == ZERO_EXTEND);
5081
      return fold_convert (type, make_tree (t, XEXP (x, 0)));
5082
 
5083
    case CONST:
5084
      return make_tree (type, XEXP (x, 0));
5085
 
5086
    case SYMBOL_REF:
5087
      t = SYMBOL_REF_DECL (x);
5088
      if (t)
5089
        return fold_convert (type, build_fold_addr_expr (t));
5090
      /* else fall through.  */
5091
 
5092
    default:
5093
      t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5094
 
5095
      /* If TYPE is a POINTER_TYPE, we might need to convert X from
5096
         address mode to pointer mode.  */
5097
      if (POINTER_TYPE_P (type))
5098
        x = convert_memory_address_addr_space
5099
              (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5100
 
5101
      /* Note that we do *not* use SET_DECL_RTL here, because we do not
5102
         want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5103
      t->decl_with_rtl.rtl = x;
5104
 
5105
      return t;
5106
    }
5107
}
5108
 
5109
/* Compute the logical-and of OP0 and OP1, storing it in TARGET
5110
   and returning TARGET.
5111
 
5112
   If TARGET is 0, a pseudo-register or constant is returned.  */
5113
 
5114
rtx
5115
expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5116
{
5117
  rtx tem = 0;
5118
 
5119
  if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5120
    tem = simplify_binary_operation (AND, mode, op0, op1);
5121
  if (tem == 0)
5122
    tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5123
 
5124
  if (target == 0)
5125
    target = tem;
5126
  else if (tem != target)
5127
    emit_move_insn (target, tem);
5128
  return target;
5129
}
5130
 
5131
/* Helper function for emit_store_flag.  */
5132
static rtx
5133
emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5134
             enum machine_mode mode, enum machine_mode compare_mode,
5135
             int unsignedp, rtx x, rtx y, int normalizep,
5136
             enum machine_mode target_mode)
5137
{
5138
  rtx op0, last, comparison, subtarget, pattern;
5139
  enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5140
 
5141
  last = get_last_insn ();
5142
  x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5143
  y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5144
  comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5145
  if (!x || !y
5146
      || !insn_data[icode].operand[2].predicate
5147
          (x, insn_data[icode].operand[2].mode)
5148
      || !insn_data[icode].operand[3].predicate
5149
          (y, insn_data[icode].operand[3].mode)
5150
      || !insn_data[icode].operand[1].predicate (comparison, VOIDmode))
5151
    {
5152
      delete_insns_since (last);
5153
      return NULL_RTX;
5154
    }
5155
 
5156
  if (target_mode == VOIDmode)
5157
    target_mode = result_mode;
5158
  if (!target)
5159
    target = gen_reg_rtx (target_mode);
5160
 
5161
  if (optimize
5162
      || !(insn_data[(int) icode].operand[0].predicate (target, result_mode)))
5163
    subtarget = gen_reg_rtx (result_mode);
5164
  else
5165
    subtarget = target;
5166
 
5167
  pattern = GEN_FCN (icode) (subtarget, comparison, x, y);
5168
  if (!pattern)
5169
    return NULL_RTX;
5170
  emit_insn (pattern);
5171
 
5172
  /* If we are converting to a wider mode, first convert to
5173
     TARGET_MODE, then normalize.  This produces better combining
5174
     opportunities on machines that have a SIGN_EXTRACT when we are
5175
     testing a single bit.  This mostly benefits the 68k.
5176
 
5177
     If STORE_FLAG_VALUE does not have the sign bit set when
5178
     interpreted in MODE, we can do this conversion as unsigned, which
5179
     is usually more efficient.  */
5180
  if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5181
    {
5182
      convert_move (target, subtarget,
5183
                    (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT)
5184
                    && 0 == (STORE_FLAG_VALUE
5185
                             & ((HOST_WIDE_INT) 1
5186
                                << (GET_MODE_BITSIZE (result_mode) -1))));
5187
      op0 = target;
5188
      result_mode = target_mode;
5189
    }
5190
  else
5191
    op0 = subtarget;
5192
 
5193
  /* If we want to keep subexpressions around, don't reuse our last
5194
     target.  */
5195
  if (optimize)
5196
    subtarget = 0;
5197
 
5198
  /* Now normalize to the proper value in MODE.  Sometimes we don't
5199
     have to do anything.  */
5200
  if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5201
    ;
5202
  /* STORE_FLAG_VALUE might be the most negative number, so write
5203
     the comparison this way to avoid a compiler-time warning.  */
5204
  else if (- normalizep == STORE_FLAG_VALUE)
5205
    op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5206
 
5207
  /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5208
     it hard to use a value of just the sign bit due to ANSI integer
5209
     constant typing rules.  */
5210
  else if (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT
5211
           && (STORE_FLAG_VALUE
5212
               & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (result_mode) - 1))))
5213
    op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5214
                        size_int (GET_MODE_BITSIZE (result_mode) - 1), subtarget,
5215
                        normalizep == 1);
5216
  else
5217
    {
5218
      gcc_assert (STORE_FLAG_VALUE & 1);
5219
 
5220
      op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5221
      if (normalizep == -1)
5222
        op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5223
    }
5224
 
5225
  /* If we were converting to a smaller mode, do the conversion now.  */
5226
  if (target_mode != result_mode)
5227
    {
5228
      convert_move (target, op0, 0);
5229
      return target;
5230
    }
5231
  else
5232
    return op0;
5233
}
5234
 
5235
 
5236
/* A subroutine of emit_store_flag only including "tricks" that do not
5237
   need a recursive call.  These are kept separate to avoid infinite
5238
   loops.  */
5239
 
5240
static rtx
5241
emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5242
                   enum machine_mode mode, int unsignedp, int normalizep,
5243
                   enum machine_mode target_mode)
5244
{
5245
  rtx subtarget;
5246
  enum insn_code icode;
5247
  enum machine_mode compare_mode;
5248
  enum mode_class mclass;
5249
  enum rtx_code scode;
5250
  rtx tem;
5251
 
5252
  if (unsignedp)
5253
    code = unsigned_condition (code);
5254
  scode = swap_condition (code);
5255
 
5256
  /* If one operand is constant, make it the second one.  Only do this
5257
     if the other operand is not constant as well.  */
5258
 
5259
  if (swap_commutative_operands_p (op0, op1))
5260
    {
5261
      tem = op0;
5262
      op0 = op1;
5263
      op1 = tem;
5264
      code = swap_condition (code);
5265
    }
5266
 
5267
  if (mode == VOIDmode)
5268
    mode = GET_MODE (op0);
5269
 
5270
  /* For some comparisons with 1 and -1, we can convert this to
5271
     comparisons with zero.  This will often produce more opportunities for
5272
     store-flag insns.  */
5273
 
5274
  switch (code)
5275
    {
5276
    case LT:
5277
      if (op1 == const1_rtx)
5278
        op1 = const0_rtx, code = LE;
5279
      break;
5280
    case LE:
5281
      if (op1 == constm1_rtx)
5282
        op1 = const0_rtx, code = LT;
5283
      break;
5284
    case GE:
5285
      if (op1 == const1_rtx)
5286
        op1 = const0_rtx, code = GT;
5287
      break;
5288
    case GT:
5289
      if (op1 == constm1_rtx)
5290
        op1 = const0_rtx, code = GE;
5291
      break;
5292
    case GEU:
5293
      if (op1 == const1_rtx)
5294
        op1 = const0_rtx, code = NE;
5295
      break;
5296
    case LTU:
5297
      if (op1 == const1_rtx)
5298
        op1 = const0_rtx, code = EQ;
5299
      break;
5300
    default:
5301
      break;
5302
    }
5303
 
5304
  /* If we are comparing a double-word integer with zero or -1, we can
5305
     convert the comparison into one involving a single word.  */
5306
  if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5307
      && GET_MODE_CLASS (mode) == MODE_INT
5308
      && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5309
    {
5310
      if ((code == EQ || code == NE)
5311
          && (op1 == const0_rtx || op1 == constm1_rtx))
5312
        {
5313
          rtx op00, op01;
5314
 
5315
          /* Do a logical OR or AND of the two words and compare the
5316
             result.  */
5317
          op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5318
          op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5319
          tem = expand_binop (word_mode,
5320
                              op1 == const0_rtx ? ior_optab : and_optab,
5321
                              op00, op01, NULL_RTX, unsignedp,
5322
                              OPTAB_DIRECT);
5323
 
5324
          if (tem != 0)
5325
            tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5326
                                   unsignedp, normalizep);
5327
        }
5328
      else if ((code == LT || code == GE) && op1 == const0_rtx)
5329
        {
5330
          rtx op0h;
5331
 
5332
          /* If testing the sign bit, can just test on high word.  */
5333
          op0h = simplify_gen_subreg (word_mode, op0, mode,
5334
                                      subreg_highpart_offset (word_mode,
5335
                                                              mode));
5336
          tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5337
                                 unsignedp, normalizep);
5338
        }
5339
      else
5340
        tem = NULL_RTX;
5341
 
5342
      if (tem)
5343
        {
5344
          if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5345
            return tem;
5346
          if (!target)
5347
            target = gen_reg_rtx (target_mode);
5348
 
5349
          convert_move (target, tem,
5350
 
5351
                              & ((HOST_WIDE_INT) 1
5352
                                 << (GET_MODE_BITSIZE (word_mode) -1))));
5353
          return target;
5354
        }
5355
    }
5356
 
5357
  /* If this is A < 0 or A >= 0, we can do this by taking the ones
5358
     complement of A (for GE) and shifting the sign bit to the low bit.  */
5359
  if (op1 == const0_rtx && (code == LT || code == GE)
5360
      && GET_MODE_CLASS (mode) == MODE_INT
5361
      && (normalizep || STORE_FLAG_VALUE == 1
5362
          || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5363
              && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5364
                  == ((unsigned HOST_WIDE_INT) 1
5365
                      << (GET_MODE_BITSIZE (mode) - 1))))))
5366
    {
5367
      subtarget = target;
5368
 
5369
      if (!target)
5370
        target_mode = mode;
5371
 
5372
      /* If the result is to be wider than OP0, it is best to convert it
5373
         first.  If it is to be narrower, it is *incorrect* to convert it
5374
         first.  */
5375
      else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5376
        {
5377
          op0 = convert_modes (target_mode, mode, op0, 0);
5378
          mode = target_mode;
5379
        }
5380
 
5381
      if (target_mode != mode)
5382
        subtarget = 0;
5383
 
5384
      if (code == GE)
5385
        op0 = expand_unop (mode, one_cmpl_optab, op0,
5386
                           ((STORE_FLAG_VALUE == 1 || normalizep)
5387
                            ? 0 : subtarget), 0);
5388
 
5389
      if (STORE_FLAG_VALUE == 1 || normalizep)
5390
        /* If we are supposed to produce a 0/1 value, we want to do
5391
           a logical shift from the sign bit to the low-order bit; for
5392
           a -1/0 value, we do an arithmetic shift.  */
5393
        op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5394
                            size_int (GET_MODE_BITSIZE (mode) - 1),
5395
                            subtarget, normalizep != -1);
5396
 
5397
      if (mode != target_mode)
5398
        op0 = convert_modes (target_mode, mode, op0, 0);
5399
 
5400
      return op0;
5401
    }
5402
 
5403
  mclass = GET_MODE_CLASS (mode);
5404
  for (compare_mode = mode; compare_mode != VOIDmode;
5405
       compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5406
    {
5407
     enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5408
     icode = optab_handler (cstore_optab, optab_mode)->insn_code;
5409
     if (icode != CODE_FOR_nothing)
5410
        {
5411
          do_pending_stack_adjust ();
5412
          tem = emit_cstore (target, icode, code, mode, compare_mode,
5413
                             unsignedp, op0, op1, normalizep, target_mode);
5414
          if (tem)
5415
            return tem;
5416
 
5417
          if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5418
            {
5419
              tem = emit_cstore (target, icode, scode, mode, compare_mode,
5420
                                 unsignedp, op1, op0, normalizep, target_mode);
5421
              if (tem)
5422
                return tem;
5423
            }
5424
          break;
5425
        }
5426
    }
5427
 
5428
  return 0;
5429
}
5430
 
5431
/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5432
   and storing in TARGET.  Normally return TARGET.
5433
   Return 0 if that cannot be done.
5434
 
5435
   MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5436
   it is VOIDmode, they cannot both be CONST_INT.
5437
 
5438
   UNSIGNEDP is for the case where we have to widen the operands
5439
   to perform the operation.  It says to use zero-extension.
5440
 
5441
   NORMALIZEP is 1 if we should convert the result to be either zero
5442
   or one.  Normalize is -1 if we should convert the result to be
5443
   either zero or -1.  If NORMALIZEP is zero, the result will be left
5444
   "raw" out of the scc insn.  */
5445
 
5446
rtx
5447
emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5448
                 enum machine_mode mode, int unsignedp, int normalizep)
5449
{
5450
  enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5451
  enum rtx_code rcode;
5452
  rtx subtarget;
5453
  rtx tem, last, trueval;
5454
 
5455
  tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5456
                           target_mode);
5457
  if (tem)
5458
    return tem;
5459
 
5460
  /* If we reached here, we can't do this with a scc insn, however there
5461
     are some comparisons that can be done in other ways.  Don't do any
5462
     of these cases if branches are very cheap.  */
5463
  if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5464
    return 0;
5465
 
5466
  /* See what we need to return.  We can only return a 1, -1, or the
5467
     sign bit.  */
5468
 
5469
  if (normalizep == 0)
5470
    {
5471
      if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5472
        normalizep = STORE_FLAG_VALUE;
5473
 
5474
      else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5475
               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5476
                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5477
        ;
5478
      else
5479
        return 0;
5480
    }
5481
 
5482
  last = get_last_insn ();
5483
 
5484
  /* If optimizing, use different pseudo registers for each insn, instead
5485
     of reusing the same pseudo.  This leads to better CSE, but slows
5486
     down the compiler, since there are more pseudos */
5487
  subtarget = (!optimize
5488
               && (target_mode == mode)) ? target : NULL_RTX;
5489
  trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5490
 
5491
  /* For floating-point comparisons, try the reverse comparison or try
5492
     changing the "orderedness" of the comparison.  */
5493
  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5494
    {
5495
      enum rtx_code first_code;
5496
      bool and_them;
5497
 
5498
      rcode = reverse_condition_maybe_unordered (code);
5499
      if (can_compare_p (rcode, mode, ccp_store_flag)
5500
          && (code == ORDERED || code == UNORDERED
5501
              || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5502
              || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5503
        {
5504
          int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5505
                          || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5506
 
5507
          /* For the reverse comparison, use either an addition or a XOR.  */
5508
          if (want_add
5509
              && rtx_cost (GEN_INT (normalizep), PLUS,
5510
                           optimize_insn_for_speed_p ()) == 0)
5511
            {
5512
              tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5513
                                       STORE_FLAG_VALUE, target_mode);
5514
              if (tem)
5515
                return expand_binop (target_mode, add_optab, tem,
5516
                                     GEN_INT (normalizep),
5517
                                     target, 0, OPTAB_WIDEN);
5518
            }
5519
          else if (!want_add
5520
                   && rtx_cost (trueval, XOR,
5521
                                optimize_insn_for_speed_p ()) == 0)
5522
            {
5523
              tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5524
                                       normalizep, target_mode);
5525
              if (tem)
5526
                return expand_binop (target_mode, xor_optab, tem, trueval,
5527
                                     target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5528
            }
5529
        }
5530
 
5531
      delete_insns_since (last);
5532
 
5533
      /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5534
      if (code == ORDERED || code == UNORDERED)
5535
        return 0;
5536
 
5537
      and_them = split_comparison (code, mode, &first_code, &code);
5538
 
5539
      /* If there are no NaNs, the first comparison should always fall through.
5540
         Effectively change the comparison to the other one.  */
5541
      if (!HONOR_NANS (mode))
5542
        {
5543
          gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5544
          return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5545
                                    target_mode);
5546
        }
5547
 
5548
#ifdef HAVE_conditional_move
5549
      /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5550
         conditional move.  */
5551
      tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5552
                               normalizep, target_mode);
5553
      if (tem == 0)
5554
        return 0;
5555
 
5556
      if (and_them)
5557
        tem = emit_conditional_move (target, code, op0, op1, mode,
5558
                                     tem, const0_rtx, GET_MODE (tem), 0);
5559
      else
5560
        tem = emit_conditional_move (target, code, op0, op1, mode,
5561
                                     trueval, tem, GET_MODE (tem), 0);
5562
 
5563
      if (tem == 0)
5564
        delete_insns_since (last);
5565
      return tem;
5566
#else
5567
      return 0;
5568
#endif
5569
    }
5570
 
5571
  /* The remaining tricks only apply to integer comparisons.  */
5572
 
5573
  if (GET_MODE_CLASS (mode) != MODE_INT)
5574
    return 0;
5575
 
5576
  /* If this is an equality comparison of integers, we can try to exclusive-or
5577
     (or subtract) the two operands and use a recursive call to try the
5578
     comparison with zero.  Don't do any of these cases if branches are
5579
     very cheap.  */
5580
 
5581
  if ((code == EQ || code == NE) && op1 != const0_rtx)
5582
    {
5583
      tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5584
                          OPTAB_WIDEN);
5585
 
5586
      if (tem == 0)
5587
        tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5588
                            OPTAB_WIDEN);
5589
      if (tem != 0)
5590
        tem = emit_store_flag (target, code, tem, const0_rtx,
5591
                               mode, unsignedp, normalizep);
5592
      if (tem != 0)
5593
        return tem;
5594
 
5595
      delete_insns_since (last);
5596
    }
5597
 
5598
  /* For integer comparisons, try the reverse comparison.  However, for
5599
     small X and if we'd have anyway to extend, implementing "X != 0"
5600
     as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5601
  rcode = reverse_condition (code);
5602
  if (can_compare_p (rcode, mode, ccp_store_flag)
5603
      && ! (optab_handler (cstore_optab, mode)->insn_code == CODE_FOR_nothing
5604
            && code == NE
5605
            && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5606
            && op1 == const0_rtx))
5607
    {
5608
      int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5609
                      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5610
 
5611
      /* Again, for the reverse comparison, use either an addition or a XOR.  */
5612
      if (want_add
5613
          && rtx_cost (GEN_INT (normalizep), PLUS,
5614
                       optimize_insn_for_speed_p ()) == 0)
5615
        {
5616
          tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5617
                                   STORE_FLAG_VALUE, target_mode);
5618
          if (tem != 0)
5619
            tem = expand_binop (target_mode, add_optab, tem,
5620
                                GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5621
        }
5622
      else if (!want_add
5623
               && rtx_cost (trueval, XOR,
5624
                            optimize_insn_for_speed_p ()) == 0)
5625
        {
5626
          tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5627
                                   normalizep, target_mode);
5628
          if (tem != 0)
5629
            tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5630
                                INTVAL (trueval) >= 0, OPTAB_WIDEN);
5631
        }
5632
 
5633
      if (tem != 0)
5634
        return tem;
5635
      delete_insns_since (last);
5636
    }
5637
 
5638
  /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5639
     the constant zero.  Reject all other comparisons at this point.  Only
5640
     do LE and GT if branches are expensive since they are expensive on
5641
     2-operand machines.  */
5642
 
5643
  if (op1 != const0_rtx
5644
      || (code != EQ && code != NE
5645
          && (BRANCH_COST (optimize_insn_for_speed_p (),
5646
                           false) <= 1 || (code != LE && code != GT))))
5647
    return 0;
5648
 
5649
  /* Try to put the result of the comparison in the sign bit.  Assume we can't
5650
     do the necessary operation below.  */
5651
 
5652
  tem = 0;
5653
 
5654
  /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5655
     the sign bit set.  */
5656
 
5657
  if (code == LE)
5658
    {
5659
      /* This is destructive, so SUBTARGET can't be OP0.  */
5660
      if (rtx_equal_p (subtarget, op0))
5661
        subtarget = 0;
5662
 
5663
      tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5664
                          OPTAB_WIDEN);
5665
      if (tem)
5666
        tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5667
                            OPTAB_WIDEN);
5668
    }
5669
 
5670
  /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5671
     number of bits in the mode of OP0, minus one.  */
5672
 
5673
  if (code == GT)
5674
    {
5675
      if (rtx_equal_p (subtarget, op0))
5676
        subtarget = 0;
5677
 
5678
      tem = expand_shift (RSHIFT_EXPR, mode, op0,
5679
                          size_int (GET_MODE_BITSIZE (mode) - 1),
5680
                          subtarget, 0);
5681
      tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5682
                          OPTAB_WIDEN);
5683
    }
5684
 
5685
  if (code == EQ || code == NE)
5686
    {
5687
      /* For EQ or NE, one way to do the comparison is to apply an operation
5688
         that converts the operand into a positive number if it is nonzero
5689
         or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5690
         for NE we negate.  This puts the result in the sign bit.  Then we
5691
         normalize with a shift, if needed.
5692
 
5693
         Two operations that can do the above actions are ABS and FFS, so try
5694
         them.  If that doesn't work, and MODE is smaller than a full word,
5695
         we can use zero-extension to the wider mode (an unsigned conversion)
5696
         as the operation.  */
5697
 
5698
      /* Note that ABS doesn't yield a positive number for INT_MIN, but
5699
         that is compensated by the subsequent overflow when subtracting
5700
         one / negating.  */
5701
 
5702
      if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5703
        tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5704
      else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5705
        tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5706
      else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5707
        {
5708
          tem = convert_modes (word_mode, mode, op0, 1);
5709
          mode = word_mode;
5710
        }
5711
 
5712
      if (tem != 0)
5713
        {
5714
          if (code == EQ)
5715
            tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5716
                                0, OPTAB_WIDEN);
5717
          else
5718
            tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5719
        }
5720
 
5721
      /* If we couldn't do it that way, for NE we can "or" the two's complement
5722
         of the value with itself.  For EQ, we take the one's complement of
5723
         that "or", which is an extra insn, so we only handle EQ if branches
5724
         are expensive.  */
5725
 
5726
      if (tem == 0
5727
          && (code == NE
5728
              || BRANCH_COST (optimize_insn_for_speed_p (),
5729
                              false) > 1))
5730
        {
5731
          if (rtx_equal_p (subtarget, op0))
5732
            subtarget = 0;
5733
 
5734
          tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5735
          tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5736
                              OPTAB_WIDEN);
5737
 
5738
          if (tem && code == EQ)
5739
            tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5740
        }
5741
    }
5742
 
5743
  if (tem && normalizep)
5744
    tem = expand_shift (RSHIFT_EXPR, mode, tem,
5745
                        size_int (GET_MODE_BITSIZE (mode) - 1),
5746
                        subtarget, normalizep == 1);
5747
 
5748
  if (tem)
5749
    {
5750
      if (!target)
5751
        ;
5752
      else if (GET_MODE (tem) != target_mode)
5753
        {
5754
          convert_move (target, tem, 0);
5755
          tem = target;
5756
        }
5757
      else if (!subtarget)
5758
        {
5759
          emit_move_insn (target, tem);
5760
          tem = target;
5761
        }
5762
    }
5763
  else
5764
    delete_insns_since (last);
5765
 
5766
  return tem;
5767
}
5768
 
5769
/* Like emit_store_flag, but always succeeds.  */
5770
 
5771
rtx
5772
emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5773
                       enum machine_mode mode, int unsignedp, int normalizep)
5774
{
5775
  rtx tem, label;
5776
  rtx trueval, falseval;
5777
 
5778
  /* First see if emit_store_flag can do the job.  */
5779
  tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5780
  if (tem != 0)
5781
    return tem;
5782
 
5783
  if (!target)
5784
    target = gen_reg_rtx (word_mode);
5785
 
5786
  /* If this failed, we have to do this with set/compare/jump/set code.
5787
     For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5788
  trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5789
  if (code == NE
5790
      && GET_MODE_CLASS (mode) == MODE_INT
5791
      && REG_P (target)
5792
      && op0 == target
5793
      && op1 == const0_rtx)
5794
    {
5795
      label = gen_label_rtx ();
5796
      do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5797
                               mode, NULL_RTX, NULL_RTX, label, -1);
5798
      emit_move_insn (target, trueval);
5799
      emit_label (label);
5800
      return target;
5801
    }
5802
 
5803
  if (!REG_P (target)
5804
      || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5805
    target = gen_reg_rtx (GET_MODE (target));
5806
 
5807
  /* Jump in the right direction if the target cannot implement CODE
5808
     but can jump on its reverse condition.  */
5809
  falseval = const0_rtx;
5810
  if (! can_compare_p (code, mode, ccp_jump)
5811
      && (! FLOAT_MODE_P (mode)
5812
          || code == ORDERED || code == UNORDERED
5813
          || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5814
          || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5815
    {
5816
      enum rtx_code rcode;
5817
      if (FLOAT_MODE_P (mode))
5818
        rcode = reverse_condition_maybe_unordered (code);
5819
      else
5820
        rcode = reverse_condition (code);
5821
 
5822
      /* Canonicalize to UNORDERED for the libcall.  */
5823
      if (can_compare_p (rcode, mode, ccp_jump)
5824
          || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5825
        {
5826
          falseval = trueval;
5827
          trueval = const0_rtx;
5828
          code = rcode;
5829
        }
5830
    }
5831
 
5832
  emit_move_insn (target, trueval);
5833
  label = gen_label_rtx ();
5834
  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5835
                           NULL_RTX, label, -1);
5836
 
5837
  emit_move_insn (target, falseval);
5838
  emit_label (label);
5839
 
5840
  return target;
5841
}
5842
 
5843
/* Perform possibly multi-word comparison and conditional jump to LABEL
5844
   if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5845
   now a thin wrapper around do_compare_rtx_and_jump.  */
5846
 
5847
static void
5848
do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5849
                 rtx label)
5850
{
5851
  int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5852
  do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5853
                           NULL_RTX, NULL_RTX, label, -1);
5854
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.