OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [expmed.c] - Blame information for rev 684

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 684 jeremybenn
/* Medium-level subroutines: convert bit-field store and extract
2
   and shifts, multiplies and divides to rtl instructions.
3
   Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4
   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5
   2011
6
   Free Software Foundation, Inc.
7
 
8
This file is part of GCC.
9
 
10
GCC is free software; you can redistribute it and/or modify it under
11
the terms of the GNU General Public License as published by the Free
12
Software Foundation; either version 3, or (at your option) any later
13
version.
14
 
15
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16
WARRANTY; without even the implied warranty of MERCHANTABILITY or
17
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18
for more details.
19
 
20
You should have received a copy of the GNU General Public License
21
along with GCC; see the file COPYING3.  If not see
22
<http://www.gnu.org/licenses/>.  */
23
 
24
 
25
#include "config.h"
26
#include "system.h"
27
#include "coretypes.h"
28
#include "tm.h"
29
#include "diagnostic-core.h"
30
#include "rtl.h"
31
#include "tree.h"
32
#include "tm_p.h"
33
#include "flags.h"
34
#include "insn-config.h"
35
#include "expr.h"
36
#include "optabs.h"
37
#include "recog.h"
38
#include "langhooks.h"
39
#include "df.h"
40
#include "target.h"
41
#include "expmed.h"
42
 
43
struct target_expmed default_target_expmed;
44
#if SWITCHABLE_TARGET
45
struct target_expmed *this_target_expmed = &default_target_expmed;
46
#endif
47
 
48
static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49
                                   unsigned HOST_WIDE_INT,
50
                                   unsigned HOST_WIDE_INT,
51
                                   unsigned HOST_WIDE_INT,
52
                                   unsigned HOST_WIDE_INT,
53
                                   rtx);
54
static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55
                                   unsigned HOST_WIDE_INT,
56
                                   unsigned HOST_WIDE_INT,
57
                                   unsigned HOST_WIDE_INT,
58
                                   rtx);
59
static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60
                                    unsigned HOST_WIDE_INT,
61
                                    unsigned HOST_WIDE_INT,
62
                                    unsigned HOST_WIDE_INT, rtx, int, bool);
63
static rtx mask_rtx (enum machine_mode, int, int, int);
64
static rtx lshift_value (enum machine_mode, rtx, int, int);
65
static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66
                                    unsigned HOST_WIDE_INT, int);
67
static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68
static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69
static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
70
 
71
/* Test whether a value is zero of a power of two.  */
72
#define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
73
 
74
#ifndef SLOW_UNALIGNED_ACCESS
75
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
76
#endif
77
 
78
 
79
/* Reduce conditional compilation elsewhere.  */
80
#ifndef HAVE_insv
81
#define HAVE_insv       0
82
#define CODE_FOR_insv   CODE_FOR_nothing
83
#define gen_insv(a,b,c,d) NULL_RTX
84
#endif
85
#ifndef HAVE_extv
86
#define HAVE_extv       0
87
#define CODE_FOR_extv   CODE_FOR_nothing
88
#define gen_extv(a,b,c,d) NULL_RTX
89
#endif
90
#ifndef HAVE_extzv
91
#define HAVE_extzv      0
92
#define CODE_FOR_extzv  CODE_FOR_nothing
93
#define gen_extzv(a,b,c,d) NULL_RTX
94
#endif
95
 
96
void
97
init_expmed (void)
98
{
99
  struct
100
  {
101
    struct rtx_def reg;         rtunion reg_fld[2];
102
    struct rtx_def plus;        rtunion plus_fld1;
103
    struct rtx_def neg;
104
    struct rtx_def mult;        rtunion mult_fld1;
105
    struct rtx_def sdiv;        rtunion sdiv_fld1;
106
    struct rtx_def udiv;        rtunion udiv_fld1;
107
    struct rtx_def zext;
108
    struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
109
    struct rtx_def smod_32;     rtunion smod_32_fld1;
110
    struct rtx_def wide_mult;   rtunion wide_mult_fld1;
111
    struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
112
    struct rtx_def wide_trunc;
113
    struct rtx_def shift;       rtunion shift_fld1;
114
    struct rtx_def shift_mult;  rtunion shift_mult_fld1;
115
    struct rtx_def shift_add;   rtunion shift_add_fld1;
116
    struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
117
    struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
118
  } all;
119
 
120
  rtx pow2[MAX_BITS_PER_WORD];
121
  rtx cint[MAX_BITS_PER_WORD];
122
  int m, n;
123
  enum machine_mode mode, wider_mode;
124
  int speed;
125
 
126
 
127
  for (m = 1; m < MAX_BITS_PER_WORD; m++)
128
    {
129
      pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
130
      cint[m] = GEN_INT (m);
131
    }
132
  memset (&all, 0, sizeof all);
133
 
134
  PUT_CODE (&all.reg, REG);
135
  /* Avoid using hard regs in ways which may be unsupported.  */
136
  SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
137
 
138
  PUT_CODE (&all.plus, PLUS);
139
  XEXP (&all.plus, 0) = &all.reg;
140
  XEXP (&all.plus, 1) = &all.reg;
141
 
142
  PUT_CODE (&all.neg, NEG);
143
  XEXP (&all.neg, 0) = &all.reg;
144
 
145
  PUT_CODE (&all.mult, MULT);
146
  XEXP (&all.mult, 0) = &all.reg;
147
  XEXP (&all.mult, 1) = &all.reg;
148
 
149
  PUT_CODE (&all.sdiv, DIV);
150
  XEXP (&all.sdiv, 0) = &all.reg;
151
  XEXP (&all.sdiv, 1) = &all.reg;
152
 
153
  PUT_CODE (&all.udiv, UDIV);
154
  XEXP (&all.udiv, 0) = &all.reg;
155
  XEXP (&all.udiv, 1) = &all.reg;
156
 
157
  PUT_CODE (&all.sdiv_32, DIV);
158
  XEXP (&all.sdiv_32, 0) = &all.reg;
159
  XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
160
 
161
  PUT_CODE (&all.smod_32, MOD);
162
  XEXP (&all.smod_32, 0) = &all.reg;
163
  XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
164
 
165
  PUT_CODE (&all.zext, ZERO_EXTEND);
166
  XEXP (&all.zext, 0) = &all.reg;
167
 
168
  PUT_CODE (&all.wide_mult, MULT);
169
  XEXP (&all.wide_mult, 0) = &all.zext;
170
  XEXP (&all.wide_mult, 1) = &all.zext;
171
 
172
  PUT_CODE (&all.wide_lshr, LSHIFTRT);
173
  XEXP (&all.wide_lshr, 0) = &all.wide_mult;
174
 
175
  PUT_CODE (&all.wide_trunc, TRUNCATE);
176
  XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
177
 
178
  PUT_CODE (&all.shift, ASHIFT);
179
  XEXP (&all.shift, 0) = &all.reg;
180
 
181
  PUT_CODE (&all.shift_mult, MULT);
182
  XEXP (&all.shift_mult, 0) = &all.reg;
183
 
184
  PUT_CODE (&all.shift_add, PLUS);
185
  XEXP (&all.shift_add, 0) = &all.shift_mult;
186
  XEXP (&all.shift_add, 1) = &all.reg;
187
 
188
  PUT_CODE (&all.shift_sub0, MINUS);
189
  XEXP (&all.shift_sub0, 0) = &all.shift_mult;
190
  XEXP (&all.shift_sub0, 1) = &all.reg;
191
 
192
  PUT_CODE (&all.shift_sub1, MINUS);
193
  XEXP (&all.shift_sub1, 0) = &all.reg;
194
  XEXP (&all.shift_sub1, 1) = &all.shift_mult;
195
 
196
  for (speed = 0; speed < 2; speed++)
197
    {
198
      crtl->maybe_hot_insn_p = speed;
199
      zero_cost[speed] = set_src_cost (const0_rtx, speed);
200
 
201
      for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
202
           mode != VOIDmode;
203
           mode = GET_MODE_WIDER_MODE (mode))
204
        {
205
          PUT_MODE (&all.reg, mode);
206
          PUT_MODE (&all.plus, mode);
207
          PUT_MODE (&all.neg, mode);
208
          PUT_MODE (&all.mult, mode);
209
          PUT_MODE (&all.sdiv, mode);
210
          PUT_MODE (&all.udiv, mode);
211
          PUT_MODE (&all.sdiv_32, mode);
212
          PUT_MODE (&all.smod_32, mode);
213
          PUT_MODE (&all.wide_trunc, mode);
214
          PUT_MODE (&all.shift, mode);
215
          PUT_MODE (&all.shift_mult, mode);
216
          PUT_MODE (&all.shift_add, mode);
217
          PUT_MODE (&all.shift_sub0, mode);
218
          PUT_MODE (&all.shift_sub1, mode);
219
 
220
          add_cost[speed][mode] = set_src_cost (&all.plus, speed);
221
          neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
222
          mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
223
          sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
224
          udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
225
 
226
          sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
227
                                          <= 2 * add_cost[speed][mode]);
228
          smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
229
                                          <= 4 * add_cost[speed][mode]);
230
 
231
          wider_mode = GET_MODE_WIDER_MODE (mode);
232
          if (wider_mode != VOIDmode)
233
            {
234
              PUT_MODE (&all.zext, wider_mode);
235
              PUT_MODE (&all.wide_mult, wider_mode);
236
              PUT_MODE (&all.wide_lshr, wider_mode);
237
              XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
238
 
239
              mul_widen_cost[speed][wider_mode]
240
                = set_src_cost (&all.wide_mult, speed);
241
              mul_highpart_cost[speed][mode]
242
                = set_src_cost (&all.wide_trunc, speed);
243
            }
244
 
245
          shift_cost[speed][mode][0] = 0;
246
          shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
247
            = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
248
 
249
          n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
250
          for (m = 1; m < n; m++)
251
            {
252
              XEXP (&all.shift, 1) = cint[m];
253
              XEXP (&all.shift_mult, 1) = pow2[m];
254
 
255
              shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
256
              shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
257
                                                            speed);
258
              shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
259
                                                             speed);
260
              shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
261
                                                             speed);
262
            }
263
        }
264
    }
265
  if (alg_hash_used_p)
266
    memset (alg_hash, 0, sizeof (alg_hash));
267
  else
268
    alg_hash_used_p = true;
269
  default_rtl_profile ();
270
}
271
 
272
/* Return an rtx representing minus the value of X.
273
   MODE is the intended mode of the result,
274
   useful if X is a CONST_INT.  */
275
 
276
rtx
277
negate_rtx (enum machine_mode mode, rtx x)
278
{
279
  rtx result = simplify_unary_operation (NEG, mode, x, mode);
280
 
281
  if (result == 0)
282
    result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
283
 
284
  return result;
285
}
286
 
287
/* Report on the availability of insv/extv/extzv and the desired mode
288
   of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
289
   is false; else the mode of the specified operand.  If OPNO is -1,
290
   all the caller cares about is whether the insn is available.  */
291
enum machine_mode
292
mode_for_extraction (enum extraction_pattern pattern, int opno)
293
{
294
  const struct insn_data_d *data;
295
 
296
  switch (pattern)
297
    {
298
    case EP_insv:
299
      if (HAVE_insv)
300
        {
301
          data = &insn_data[CODE_FOR_insv];
302
          break;
303
        }
304
      return MAX_MACHINE_MODE;
305
 
306
    case EP_extv:
307
      if (HAVE_extv)
308
        {
309
          data = &insn_data[CODE_FOR_extv];
310
          break;
311
        }
312
      return MAX_MACHINE_MODE;
313
 
314
    case EP_extzv:
315
      if (HAVE_extzv)
316
        {
317
          data = &insn_data[CODE_FOR_extzv];
318
          break;
319
        }
320
      return MAX_MACHINE_MODE;
321
 
322
    default:
323
      gcc_unreachable ();
324
    }
325
 
326
  if (opno == -1)
327
    return VOIDmode;
328
 
329
  /* Everyone who uses this function used to follow it with
330
     if (result == VOIDmode) result = word_mode; */
331
  if (data->operand[opno].mode == VOIDmode)
332
    return word_mode;
333
  return data->operand[opno].mode;
334
}
335
 
336
/* A subroutine of store_bit_field, with the same arguments.  Return true
337
   if the operation could be implemented.
338
 
339
   If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
340
   no other way of implementing the operation.  If FALLBACK_P is false,
341
   return false instead.  */
342
 
343
static bool
344
store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
345
                   unsigned HOST_WIDE_INT bitnum,
346
                   unsigned HOST_WIDE_INT bitregion_start,
347
                   unsigned HOST_WIDE_INT bitregion_end,
348
                   enum machine_mode fieldmode,
349
                   rtx value, bool fallback_p)
350
{
351
  unsigned int unit
352
    = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
353
  unsigned HOST_WIDE_INT offset, bitpos;
354
  rtx op0 = str_rtx;
355
  int byte_offset;
356
  rtx orig_value;
357
 
358
  enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
359
 
360
  while (GET_CODE (op0) == SUBREG)
361
    {
362
      /* The following line once was done only if WORDS_BIG_ENDIAN,
363
         but I think that is a mistake.  WORDS_BIG_ENDIAN is
364
         meaningful at a much higher level; when structures are copied
365
         between memory and regs, the higher-numbered regs
366
         always get higher addresses.  */
367
      int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
368
      int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
369
 
370
      byte_offset = 0;
371
 
372
      /* Paradoxical subregs need special handling on big endian machines.  */
373
      if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
374
        {
375
          int difference = inner_mode_size - outer_mode_size;
376
 
377
          if (WORDS_BIG_ENDIAN)
378
            byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
379
          if (BYTES_BIG_ENDIAN)
380
            byte_offset += difference % UNITS_PER_WORD;
381
        }
382
      else
383
        byte_offset = SUBREG_BYTE (op0);
384
 
385
      bitnum += byte_offset * BITS_PER_UNIT;
386
      op0 = SUBREG_REG (op0);
387
    }
388
 
389
  /* No action is needed if the target is a register and if the field
390
     lies completely outside that register.  This can occur if the source
391
     code contains an out-of-bounds access to a small array.  */
392
  if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
393
    return true;
394
 
395
  /* Use vec_set patterns for inserting parts of vectors whenever
396
     available.  */
397
  if (VECTOR_MODE_P (GET_MODE (op0))
398
      && !MEM_P (op0)
399
      && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
400
      && fieldmode == GET_MODE_INNER (GET_MODE (op0))
401
      && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
402
      && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
403
    {
404
      struct expand_operand ops[3];
405
      enum machine_mode outermode = GET_MODE (op0);
406
      enum machine_mode innermode = GET_MODE_INNER (outermode);
407
      enum insn_code icode = optab_handler (vec_set_optab, outermode);
408
      int pos = bitnum / GET_MODE_BITSIZE (innermode);
409
 
410
      create_fixed_operand (&ops[0], op0);
411
      create_input_operand (&ops[1], value, innermode);
412
      create_integer_operand (&ops[2], pos);
413
      if (maybe_expand_insn (icode, 3, ops))
414
        return true;
415
    }
416
 
417
  /* If the target is a register, overwriting the entire object, or storing
418
     a full-word or multi-word field can be done with just a SUBREG.
419
 
420
     If the target is memory, storing any naturally aligned field can be
421
     done with a simple store.  For targets that support fast unaligned
422
     memory, any naturally sized, unit aligned field can be done directly.  */
423
 
424
  offset = bitnum / unit;
425
  bitpos = bitnum % unit;
426
  byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
427
                + (offset * UNITS_PER_WORD);
428
 
429
  if (bitpos == 0
430
      && bitsize == GET_MODE_BITSIZE (fieldmode)
431
      && (!MEM_P (op0)
432
          ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
433
              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
434
             && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
435
                 || validate_subreg (fieldmode, GET_MODE (op0), op0,
436
                                     byte_offset)))
437
          : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
438
             || (offset * BITS_PER_UNIT % bitsize == 0
439
                 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
440
    {
441
      if (MEM_P (op0))
442
        op0 = adjust_address (op0, fieldmode, offset);
443
      else if (GET_MODE (op0) != fieldmode)
444
        op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
445
                                   byte_offset);
446
      emit_move_insn (op0, value);
447
      return true;
448
    }
449
 
450
  /* Make sure we are playing with integral modes.  Pun with subregs
451
     if we aren't.  This must come after the entire register case above,
452
     since that case is valid for any mode.  The following cases are only
453
     valid for integral modes.  */
454
  {
455
    enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
456
    if (imode != GET_MODE (op0))
457
      {
458
        if (MEM_P (op0))
459
          op0 = adjust_address (op0, imode, 0);
460
        else
461
          {
462
            gcc_assert (imode != BLKmode);
463
            op0 = gen_lowpart (imode, op0);
464
          }
465
      }
466
  }
467
 
468
  /* We may be accessing data outside the field, which means
469
     we can alias adjacent data.  */
470
  /* ?? not always for C++0x memory model ?? */
471
  if (MEM_P (op0))
472
    {
473
      op0 = shallow_copy_rtx (op0);
474
      set_mem_alias_set (op0, 0);
475
      set_mem_expr (op0, 0);
476
    }
477
 
478
  /* If OP0 is a register, BITPOS must count within a word.
479
     But as we have it, it counts within whatever size OP0 now has.
480
     On a bigendian machine, these are not the same, so convert.  */
481
  if (BYTES_BIG_ENDIAN
482
      && !MEM_P (op0)
483
      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
484
    bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
485
 
486
  /* Storing an lsb-aligned field in a register
487
     can be done with a movestrict instruction.  */
488
 
489
  if (!MEM_P (op0)
490
      && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
491
      && bitsize == GET_MODE_BITSIZE (fieldmode)
492
      && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
493
    {
494
      struct expand_operand ops[2];
495
      enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
496
      rtx arg0 = op0;
497
      unsigned HOST_WIDE_INT subreg_off;
498
 
499
      if (GET_CODE (arg0) == SUBREG)
500
        {
501
          /* Else we've got some float mode source being extracted into
502
             a different float mode destination -- this combination of
503
             subregs results in Severe Tire Damage.  */
504
          gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
505
                      || GET_MODE_CLASS (fieldmode) == MODE_INT
506
                      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
507
          arg0 = SUBREG_REG (arg0);
508
        }
509
 
510
      subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
511
                   + (offset * UNITS_PER_WORD);
512
      if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
513
        {
514
          arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
515
 
516
          create_fixed_operand (&ops[0], arg0);
517
          /* Shrink the source operand to FIELDMODE.  */
518
          create_convert_operand_to (&ops[1], value, fieldmode, false);
519
          if (maybe_expand_insn (icode, 2, ops))
520
            return true;
521
        }
522
    }
523
 
524
  /* Handle fields bigger than a word.  */
525
 
526
  if (bitsize > BITS_PER_WORD)
527
    {
528
      /* Here we transfer the words of the field
529
         in the order least significant first.
530
         This is because the most significant word is the one which may
531
         be less than full.
532
         However, only do that if the value is not BLKmode.  */
533
 
534
      unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
535
      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
536
      unsigned int i;
537
      rtx last;
538
 
539
      /* This is the mode we must force value to, so that there will be enough
540
         subwords to extract.  Note that fieldmode will often (always?) be
541
         VOIDmode, because that is what store_field uses to indicate that this
542
         is a bit field, but passing VOIDmode to operand_subword_force
543
         is not allowed.  */
544
      fieldmode = GET_MODE (value);
545
      if (fieldmode == VOIDmode)
546
        fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
547
 
548
      last = get_last_insn ();
549
      for (i = 0; i < nwords; i++)
550
        {
551
          /* If I is 0, use the low-order word in both field and target;
552
             if I is 1, use the next to lowest word; and so on.  */
553
          unsigned int wordnum = (backwards ? nwords - i - 1 : i);
554
          unsigned int bit_offset = (backwards
555
                                     ? MAX ((int) bitsize - ((int) i + 1)
556
                                            * BITS_PER_WORD,
557
                                            0)
558
                                     : (int) i * BITS_PER_WORD);
559
          rtx value_word = operand_subword_force (value, wordnum, fieldmode);
560
          unsigned HOST_WIDE_INT new_bitsize =
561
            MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
562
 
563
          /* If the remaining chunk doesn't have full wordsize we have
564
             to make sure that for big endian machines the higher order
565
             bits are used.  */
566
          if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
567
            value_word = simplify_expand_binop (word_mode, lshr_optab,
568
                                                value_word,
569
                                                GEN_INT (BITS_PER_WORD
570
                                                         - new_bitsize),
571
                                                NULL_RTX, true,
572
                                                OPTAB_LIB_WIDEN);
573
 
574
          if (!store_bit_field_1 (op0, new_bitsize,
575
                                  bitnum + bit_offset,
576
                                  bitregion_start, bitregion_end,
577
                                  word_mode,
578
                                  value_word, fallback_p))
579
            {
580
              delete_insns_since (last);
581
              return false;
582
            }
583
        }
584
      return true;
585
    }
586
 
587
  /* From here on we can assume that the field to be stored in is
588
     a full-word (whatever type that is), since it is shorter than a word.  */
589
 
590
  /* OFFSET is the number of words or bytes (UNIT says which)
591
     from STR_RTX to the first word or byte containing part of the field.  */
592
 
593
  if (!MEM_P (op0))
594
    {
595
      if (offset != 0
596
          || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
597
        {
598
          if (!REG_P (op0))
599
            {
600
              /* Since this is a destination (lvalue), we can't copy
601
                 it to a pseudo.  We can remove a SUBREG that does not
602
                 change the size of the operand.  Such a SUBREG may
603
                 have been added above.  */
604
              gcc_assert (GET_CODE (op0) == SUBREG
605
                          && (GET_MODE_SIZE (GET_MODE (op0))
606
                              == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
607
              op0 = SUBREG_REG (op0);
608
            }
609
          op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
610
                                op0, (offset * UNITS_PER_WORD));
611
        }
612
      offset = 0;
613
    }
614
 
615
  /* If VALUE has a floating-point or complex mode, access it as an
616
     integer of the corresponding size.  This can occur on a machine
617
     with 64 bit registers that uses SFmode for float.  It can also
618
     occur for unaligned float or complex fields.  */
619
  orig_value = value;
620
  if (GET_MODE (value) != VOIDmode
621
      && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
622
      && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
623
    {
624
      value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
625
      emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
626
    }
627
 
628
  /* Now OFFSET is nonzero only if OP0 is memory
629
     and is therefore always measured in bytes.  */
630
 
631
  if (HAVE_insv
632
      && GET_MODE (value) != BLKmode
633
      && bitsize > 0
634
      && GET_MODE_BITSIZE (op_mode) >= bitsize
635
      /* Do not use insv for volatile bitfields when
636
         -fstrict-volatile-bitfields is in effect.  */
637
      && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
638
           && flag_strict_volatile_bitfields > 0)
639
      && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
640
            && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
641
    {
642
      struct expand_operand ops[4];
643
      int xbitpos = bitpos;
644
      rtx value1;
645
      rtx xop0 = op0;
646
      rtx last = get_last_insn ();
647
      bool copy_back = false;
648
 
649
      /* Add OFFSET into OP0's address.  */
650
      if (MEM_P (xop0))
651
        xop0 = adjust_address (xop0, byte_mode, offset);
652
 
653
      /* If xop0 is a register, we need it in OP_MODE
654
         to make it acceptable to the format of insv.  */
655
      if (GET_CODE (xop0) == SUBREG)
656
        /* We can't just change the mode, because this might clobber op0,
657
           and we will need the original value of op0 if insv fails.  */
658
        xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
659
      if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
660
        xop0 = gen_lowpart_SUBREG (op_mode, xop0);
661
 
662
      /* If the destination is a paradoxical subreg such that we need a
663
         truncate to the inner mode, perform the insertion on a temporary and
664
         truncate the result to the original destination.  Note that we can't
665
         just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
666
         X) 0)) is (reg:N X).  */
667
      if (GET_CODE (xop0) == SUBREG
668
          && REG_P (SUBREG_REG (xop0))
669
          && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
670
                                              op_mode)))
671
        {
672
          rtx tem = gen_reg_rtx (op_mode);
673
          emit_move_insn (tem, xop0);
674
          xop0 = tem;
675
          copy_back = true;
676
        }
677
 
678
      /* We have been counting XBITPOS within UNIT.
679
         Count instead within the size of the register.  */
680
      if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
681
        xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
682
 
683
      unit = GET_MODE_BITSIZE (op_mode);
684
 
685
      /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
686
         "backwards" from the size of the unit we are inserting into.
687
         Otherwise, we count bits from the most significant on a
688
         BYTES/BITS_BIG_ENDIAN machine.  */
689
 
690
      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
691
        xbitpos = unit - bitsize - xbitpos;
692
 
693
      /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
694
      value1 = value;
695
      if (GET_MODE (value) != op_mode)
696
        {
697
          if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
698
            {
699
              /* Optimization: Don't bother really extending VALUE
700
                 if it has all the bits we will actually use.  However,
701
                 if we must narrow it, be sure we do it correctly.  */
702
 
703
              if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
704
                {
705
                  rtx tmp;
706
 
707
                  tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
708
                  if (! tmp)
709
                    tmp = simplify_gen_subreg (op_mode,
710
                                               force_reg (GET_MODE (value),
711
                                                          value1),
712
                                               GET_MODE (value), 0);
713
                  value1 = tmp;
714
                }
715
              else
716
                value1 = gen_lowpart (op_mode, value1);
717
            }
718
          else if (CONST_INT_P (value))
719
            value1 = gen_int_mode (INTVAL (value), op_mode);
720
          else
721
            /* Parse phase is supposed to make VALUE's data type
722
               match that of the component reference, which is a type
723
               at least as wide as the field; so VALUE should have
724
               a mode that corresponds to that type.  */
725
            gcc_assert (CONSTANT_P (value));
726
        }
727
 
728
      create_fixed_operand (&ops[0], xop0);
729
      create_integer_operand (&ops[1], bitsize);
730
      create_integer_operand (&ops[2], xbitpos);
731
      create_input_operand (&ops[3], value1, op_mode);
732
      if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
733
        {
734
          if (copy_back)
735
            convert_move (op0, xop0, true);
736
          return true;
737
        }
738
      delete_insns_since (last);
739
    }
740
 
741
  /* If OP0 is a memory, try copying it to a register and seeing if a
742
     cheap register alternative is available.  */
743
  if (HAVE_insv && MEM_P (op0))
744
    {
745
      enum machine_mode bestmode;
746
      unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
747
 
748
      if (bitregion_end)
749
        maxbits = bitregion_end - bitregion_start + 1;
750
 
751
      /* Get the mode to use for inserting into this field.  If OP0 is
752
         BLKmode, get the smallest mode consistent with the alignment. If
753
         OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
754
         mode. Otherwise, use the smallest mode containing the field.  */
755
 
756
      if (GET_MODE (op0) == BLKmode
757
          || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
758
          || (op_mode != MAX_MACHINE_MODE
759
              && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
760
        bestmode = get_best_mode  (bitsize, bitnum,
761
                                  bitregion_start, bitregion_end,
762
                                  MEM_ALIGN (op0),
763
                                  (op_mode == MAX_MACHINE_MODE
764
                                   ? VOIDmode : op_mode),
765
                                  MEM_VOLATILE_P (op0));
766
      else
767
        bestmode = GET_MODE (op0);
768
 
769
      if (bestmode != VOIDmode
770
          && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
771
          && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
772
               && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
773
        {
774
          rtx last, tempreg, xop0;
775
          unsigned HOST_WIDE_INT xoffset, xbitpos;
776
 
777
          last = get_last_insn ();
778
 
779
          /* Adjust address to point to the containing unit of
780
             that mode.  Compute the offset as a multiple of this unit,
781
             counting in bytes.  */
782
          unit = GET_MODE_BITSIZE (bestmode);
783
          xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
784
          xbitpos = bitnum % unit;
785
          xop0 = adjust_address (op0, bestmode, xoffset);
786
 
787
          /* Fetch that unit, store the bitfield in it, then store
788
             the unit.  */
789
          tempreg = copy_to_reg (xop0);
790
          if (store_bit_field_1 (tempreg, bitsize, xbitpos,
791
                                 bitregion_start, bitregion_end,
792
                                 fieldmode, orig_value, false))
793
            {
794
              emit_move_insn (xop0, tempreg);
795
              return true;
796
            }
797
          delete_insns_since (last);
798
        }
799
    }
800
 
801
  if (!fallback_p)
802
    return false;
803
 
804
  store_fixed_bit_field (op0, offset, bitsize, bitpos,
805
                         bitregion_start, bitregion_end, value);
806
  return true;
807
}
808
 
809
/* Generate code to store value from rtx VALUE
810
   into a bit-field within structure STR_RTX
811
   containing BITSIZE bits starting at bit BITNUM.
812
 
813
   BITREGION_START is bitpos of the first bitfield in this region.
814
   BITREGION_END is the bitpos of the ending bitfield in this region.
815
   These two fields are 0, if the C++ memory model does not apply,
816
   or we are not interested in keeping track of bitfield regions.
817
 
818
   FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
819
 
820
void
821
store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
822
                 unsigned HOST_WIDE_INT bitnum,
823
                 unsigned HOST_WIDE_INT bitregion_start,
824
                 unsigned HOST_WIDE_INT bitregion_end,
825
                 enum machine_mode fieldmode,
826
                 rtx value)
827
{
828
  /* Under the C++0x memory model, we must not touch bits outside the
829
     bit region.  Adjust the address to start at the beginning of the
830
     bit region.  */
831
  if (MEM_P (str_rtx)
832
      && bitregion_start > 0)
833
    {
834
      enum machine_mode bestmode;
835
      enum machine_mode op_mode;
836
      unsigned HOST_WIDE_INT offset;
837
 
838
      op_mode = mode_for_extraction (EP_insv, 3);
839
      if (op_mode == MAX_MACHINE_MODE)
840
        op_mode = VOIDmode;
841
 
842
      offset = bitregion_start / BITS_PER_UNIT;
843
      bitnum -= bitregion_start;
844
      bitregion_end -= bitregion_start;
845
      bitregion_start = 0;
846
      bestmode = get_best_mode (bitsize, bitnum,
847
                                bitregion_start, bitregion_end,
848
                                MEM_ALIGN (str_rtx),
849
                                op_mode,
850
                                MEM_VOLATILE_P (str_rtx));
851
      str_rtx = adjust_address (str_rtx, bestmode, offset);
852
    }
853
 
854
  if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
855
                          bitregion_start, bitregion_end,
856
                          fieldmode, value, true))
857
    gcc_unreachable ();
858
}
859
 
860
/* Use shifts and boolean operations to store VALUE
861
   into a bit field of width BITSIZE
862
   in a memory location specified by OP0 except offset by OFFSET bytes.
863
     (OFFSET must be 0 if OP0 is a register.)
864
   The field starts at position BITPOS within the byte.
865
    (If OP0 is a register, it may be a full word or a narrower mode,
866
     but BITPOS still counts within a full word,
867
     which is significant on bigendian machines.)  */
868
 
869
static void
870
store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
871
                       unsigned HOST_WIDE_INT bitsize,
872
                       unsigned HOST_WIDE_INT bitpos,
873
                       unsigned HOST_WIDE_INT bitregion_start,
874
                       unsigned HOST_WIDE_INT bitregion_end,
875
                       rtx value)
876
{
877
  enum machine_mode mode;
878
  unsigned int total_bits = BITS_PER_WORD;
879
  rtx temp;
880
  int all_zero = 0;
881
  int all_one = 0;
882
 
883
  /* There is a case not handled here:
884
     a structure with a known alignment of just a halfword
885
     and a field split across two aligned halfwords within the structure.
886
     Or likewise a structure with a known alignment of just a byte
887
     and a field split across two bytes.
888
     Such cases are not supposed to be able to occur.  */
889
 
890
  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
891
    {
892
      gcc_assert (!offset);
893
      /* Special treatment for a bit field split across two registers.  */
894
      if (bitsize + bitpos > BITS_PER_WORD)
895
        {
896
          store_split_bit_field (op0, bitsize, bitpos,
897
                                 bitregion_start, bitregion_end,
898
                                 value);
899
          return;
900
        }
901
    }
902
  else
903
    {
904
      unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
905
 
906
      if (bitregion_end)
907
        maxbits = bitregion_end - bitregion_start + 1;
908
 
909
      /* Get the proper mode to use for this field.  We want a mode that
910
         includes the entire field.  If such a mode would be larger than
911
         a word, we won't be doing the extraction the normal way.
912
         We don't want a mode bigger than the destination.  */
913
 
914
      mode = GET_MODE (op0);
915
      if (GET_MODE_BITSIZE (mode) == 0
916
          || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
917
        mode = word_mode;
918
 
919
      if (MEM_VOLATILE_P (op0)
920
          && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
921
          && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
922
          && flag_strict_volatile_bitfields > 0)
923
        mode = GET_MODE (op0);
924
      else
925
        mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
926
                              bitregion_start, bitregion_end,
927
                              MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
928
 
929
      if (mode == VOIDmode)
930
        {
931
          /* The only way this should occur is if the field spans word
932
             boundaries.  */
933
          store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
934
                                 bitregion_start, bitregion_end, value);
935
          return;
936
        }
937
 
938
      total_bits = GET_MODE_BITSIZE (mode);
939
 
940
      /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
941
         be in the range 0 to total_bits-1, and put any excess bytes in
942
         OFFSET.  */
943
      if (bitpos >= total_bits)
944
        {
945
          offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
946
          bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
947
                     * BITS_PER_UNIT);
948
        }
949
 
950
      /* Get ref to an aligned byte, halfword, or word containing the field.
951
         Adjust BITPOS to be position within a word,
952
         and OFFSET to be the offset of that word.
953
         Then alter OP0 to refer to that word.  */
954
      bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
955
      offset -= (offset % (total_bits / BITS_PER_UNIT));
956
      op0 = adjust_address (op0, mode, offset);
957
    }
958
 
959
  mode = GET_MODE (op0);
960
 
961
  /* Now MODE is either some integral mode for a MEM as OP0,
962
     or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
963
     The bit field is contained entirely within OP0.
964
     BITPOS is the starting bit number within OP0.
965
     (OP0's mode may actually be narrower than MODE.)  */
966
 
967
  if (BYTES_BIG_ENDIAN)
968
      /* BITPOS is the distance between our msb
969
         and that of the containing datum.
970
         Convert it to the distance from the lsb.  */
971
      bitpos = total_bits - bitsize - bitpos;
972
 
973
  /* Now BITPOS is always the distance between our lsb
974
     and that of OP0.  */
975
 
976
  /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
977
     we must first convert its mode to MODE.  */
978
 
979
  if (CONST_INT_P (value))
980
    {
981
      HOST_WIDE_INT v = INTVAL (value);
982
 
983
      if (bitsize < HOST_BITS_PER_WIDE_INT)
984
        v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
985
 
986
      if (v == 0)
987
        all_zero = 1;
988
      else if ((bitsize < HOST_BITS_PER_WIDE_INT
989
                && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
990
               || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
991
        all_one = 1;
992
 
993
      value = lshift_value (mode, value, bitpos, bitsize);
994
    }
995
  else
996
    {
997
      int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
998
                      && bitpos + bitsize != GET_MODE_BITSIZE (mode));
999
 
1000
      if (GET_MODE (value) != mode)
1001
        value = convert_to_mode (mode, value, 1);
1002
 
1003
      if (must_and)
1004
        value = expand_binop (mode, and_optab, value,
1005
                              mask_rtx (mode, 0, bitsize, 0),
1006
                              NULL_RTX, 1, OPTAB_LIB_WIDEN);
1007
      if (bitpos > 0)
1008
        value = expand_shift (LSHIFT_EXPR, mode, value,
1009
                              bitpos, NULL_RTX, 1);
1010
    }
1011
 
1012
  /* Now clear the chosen bits in OP0,
1013
     except that if VALUE is -1 we need not bother.  */
1014
  /* We keep the intermediates in registers to allow CSE to combine
1015
     consecutive bitfield assignments.  */
1016
 
1017
  temp = force_reg (mode, op0);
1018
 
1019
  if (! all_one)
1020
    {
1021
      temp = expand_binop (mode, and_optab, temp,
1022
                           mask_rtx (mode, bitpos, bitsize, 1),
1023
                           NULL_RTX, 1, OPTAB_LIB_WIDEN);
1024
      temp = force_reg (mode, temp);
1025
    }
1026
 
1027
  /* Now logical-or VALUE into OP0, unless it is zero.  */
1028
 
1029
  if (! all_zero)
1030
    {
1031
      temp = expand_binop (mode, ior_optab, temp, value,
1032
                           NULL_RTX, 1, OPTAB_LIB_WIDEN);
1033
      temp = force_reg (mode, temp);
1034
    }
1035
 
1036
  if (op0 != temp)
1037
    {
1038
      op0 = copy_rtx (op0);
1039
      emit_move_insn (op0, temp);
1040
    }
1041
}
1042
 
1043
/* Store a bit field that is split across multiple accessible memory objects.
1044
 
1045
   OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1046
   BITSIZE is the field width; BITPOS the position of its first bit
1047
   (within the word).
1048
   VALUE is the value to store.
1049
 
1050
   This does not yet handle fields wider than BITS_PER_WORD.  */
1051
 
1052
static void
1053
store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1054
                       unsigned HOST_WIDE_INT bitpos,
1055
                       unsigned HOST_WIDE_INT bitregion_start,
1056
                       unsigned HOST_WIDE_INT bitregion_end,
1057
                       rtx value)
1058
{
1059
  unsigned int unit;
1060
  unsigned int bitsdone = 0;
1061
 
1062
  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1063
     much at a time.  */
1064
  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1065
    unit = BITS_PER_WORD;
1066
  else
1067
    unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1068
 
1069
  /* If VALUE is a constant other than a CONST_INT, get it into a register in
1070
     WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1071
     that VALUE might be a floating-point constant.  */
1072
  if (CONSTANT_P (value) && !CONST_INT_P (value))
1073
    {
1074
      rtx word = gen_lowpart_common (word_mode, value);
1075
 
1076
      if (word && (value != word))
1077
        value = word;
1078
      else
1079
        value = gen_lowpart_common (word_mode,
1080
                                    force_reg (GET_MODE (value) != VOIDmode
1081
                                               ? GET_MODE (value)
1082
                                               : word_mode, value));
1083
    }
1084
 
1085
  while (bitsdone < bitsize)
1086
    {
1087
      unsigned HOST_WIDE_INT thissize;
1088
      rtx part, word;
1089
      unsigned HOST_WIDE_INT thispos;
1090
      unsigned HOST_WIDE_INT offset;
1091
 
1092
      offset = (bitpos + bitsdone) / unit;
1093
      thispos = (bitpos + bitsdone) % unit;
1094
 
1095
      /* THISSIZE must not overrun a word boundary.  Otherwise,
1096
         store_fixed_bit_field will call us again, and we will mutually
1097
         recurse forever.  */
1098
      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1099
      thissize = MIN (thissize, unit - thispos);
1100
 
1101
      if (BYTES_BIG_ENDIAN)
1102
        {
1103
          int total_bits;
1104
 
1105
          /* We must do an endian conversion exactly the same way as it is
1106
             done in extract_bit_field, so that the two calls to
1107
             extract_fixed_bit_field will have comparable arguments.  */
1108
          if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1109
            total_bits = BITS_PER_WORD;
1110
          else
1111
            total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1112
 
1113
          /* Fetch successively less significant portions.  */
1114
          if (CONST_INT_P (value))
1115
            part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1116
                             >> (bitsize - bitsdone - thissize))
1117
                            & (((HOST_WIDE_INT) 1 << thissize) - 1));
1118
          else
1119
            /* The args are chosen so that the last part includes the
1120
               lsb.  Give extract_bit_field the value it needs (with
1121
               endianness compensation) to fetch the piece we want.  */
1122
            part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1123
                                            total_bits - bitsize + bitsdone,
1124
                                            NULL_RTX, 1, false);
1125
        }
1126
      else
1127
        {
1128
          /* Fetch successively more significant portions.  */
1129
          if (CONST_INT_P (value))
1130
            part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1131
                             >> bitsdone)
1132
                            & (((HOST_WIDE_INT) 1 << thissize) - 1));
1133
          else
1134
            part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1135
                                            bitsdone, NULL_RTX, 1, false);
1136
        }
1137
 
1138
      /* If OP0 is a register, then handle OFFSET here.
1139
 
1140
         When handling multiword bitfields, extract_bit_field may pass
1141
         down a word_mode SUBREG of a larger REG for a bitfield that actually
1142
         crosses a word boundary.  Thus, for a SUBREG, we must find
1143
         the current word starting from the base register.  */
1144
      if (GET_CODE (op0) == SUBREG)
1145
        {
1146
          int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1147
          enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1148
          if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1149
            word = word_offset ? const0_rtx : op0;
1150
          else
1151
            word = operand_subword_force (SUBREG_REG (op0), word_offset,
1152
                                          GET_MODE (SUBREG_REG (op0)));
1153
          offset = 0;
1154
        }
1155
      else if (REG_P (op0))
1156
        {
1157
          enum machine_mode op0_mode = GET_MODE (op0);
1158
          if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1159
            word = offset ? const0_rtx : op0;
1160
          else
1161
            word = operand_subword_force (op0, offset, GET_MODE (op0));
1162
          offset = 0;
1163
        }
1164
      else
1165
        word = op0;
1166
 
1167
      /* OFFSET is in UNITs, and UNIT is in bits.
1168
         store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
1169
         it is just an out-of-bounds access.  Ignore it.  */
1170
      if (word != const0_rtx)
1171
        store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1172
                               thispos, bitregion_start, bitregion_end, part);
1173
      bitsdone += thissize;
1174
    }
1175
}
1176
 
1177
/* A subroutine of extract_bit_field_1 that converts return value X
1178
   to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1179
   to extract_bit_field.  */
1180
 
1181
static rtx
1182
convert_extracted_bit_field (rtx x, enum machine_mode mode,
1183
                             enum machine_mode tmode, bool unsignedp)
1184
{
1185
  if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1186
    return x;
1187
 
1188
  /* If the x mode is not a scalar integral, first convert to the
1189
     integer mode of that size and then access it as a floating-point
1190
     value via a SUBREG.  */
1191
  if (!SCALAR_INT_MODE_P (tmode))
1192
    {
1193
      enum machine_mode smode;
1194
 
1195
      smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1196
      x = convert_to_mode (smode, x, unsignedp);
1197
      x = force_reg (smode, x);
1198
      return gen_lowpart (tmode, x);
1199
    }
1200
 
1201
  return convert_to_mode (tmode, x, unsignedp);
1202
}
1203
 
1204
/* A subroutine of extract_bit_field, with the same arguments.
1205
   If FALLBACK_P is true, fall back to extract_fixed_bit_field
1206
   if we can find no other means of implementing the operation.
1207
   if FALLBACK_P is false, return NULL instead.  */
1208
 
1209
static rtx
1210
extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1211
                     unsigned HOST_WIDE_INT bitnum,
1212
                     int unsignedp, bool packedp, rtx target,
1213
                     enum machine_mode mode, enum machine_mode tmode,
1214
                     bool fallback_p)
1215
{
1216
  unsigned int unit
1217
    = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1218
  unsigned HOST_WIDE_INT offset, bitpos;
1219
  rtx op0 = str_rtx;
1220
  enum machine_mode int_mode;
1221
  enum machine_mode ext_mode;
1222
  enum machine_mode mode1;
1223
  int byte_offset;
1224
 
1225
  if (tmode == VOIDmode)
1226
    tmode = mode;
1227
 
1228
  while (GET_CODE (op0) == SUBREG)
1229
    {
1230
      bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1231
      op0 = SUBREG_REG (op0);
1232
    }
1233
 
1234
  /* If we have an out-of-bounds access to a register, just return an
1235
     uninitialized register of the required mode.  This can occur if the
1236
     source code contains an out-of-bounds access to a small array.  */
1237
  if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1238
    return gen_reg_rtx (tmode);
1239
 
1240
  if (REG_P (op0)
1241
      && mode == GET_MODE (op0)
1242
      && bitnum == 0
1243
      && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1244
    {
1245
      /* We're trying to extract a full register from itself.  */
1246
      return op0;
1247
    }
1248
 
1249
  /* See if we can get a better vector mode before extracting.  */
1250
  if (VECTOR_MODE_P (GET_MODE (op0))
1251
      && !MEM_P (op0)
1252
      && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1253
    {
1254
      enum machine_mode new_mode;
1255
 
1256
      if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1257
        new_mode = MIN_MODE_VECTOR_FLOAT;
1258
      else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1259
        new_mode = MIN_MODE_VECTOR_FRACT;
1260
      else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1261
        new_mode = MIN_MODE_VECTOR_UFRACT;
1262
      else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1263
        new_mode = MIN_MODE_VECTOR_ACCUM;
1264
      else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1265
        new_mode = MIN_MODE_VECTOR_UACCUM;
1266
      else
1267
        new_mode = MIN_MODE_VECTOR_INT;
1268
 
1269
      for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1270
        if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1271
            && targetm.vector_mode_supported_p (new_mode))
1272
          break;
1273
      if (new_mode != VOIDmode)
1274
        op0 = gen_lowpart (new_mode, op0);
1275
    }
1276
 
1277
  /* Use vec_extract patterns for extracting parts of vectors whenever
1278
     available.  */
1279
  if (VECTOR_MODE_P (GET_MODE (op0))
1280
      && !MEM_P (op0)
1281
      && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1282
      && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1283
          == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1284
    {
1285
      struct expand_operand ops[3];
1286
      enum machine_mode outermode = GET_MODE (op0);
1287
      enum machine_mode innermode = GET_MODE_INNER (outermode);
1288
      enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1289
      unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1290
 
1291
      create_output_operand (&ops[0], target, innermode);
1292
      create_input_operand (&ops[1], op0, outermode);
1293
      create_integer_operand (&ops[2], pos);
1294
      if (maybe_expand_insn (icode, 3, ops))
1295
        {
1296
          target = ops[0].value;
1297
          if (GET_MODE (target) != mode)
1298
            return gen_lowpart (tmode, target);
1299
          return target;
1300
        }
1301
    }
1302
 
1303
  /* Make sure we are playing with integral modes.  Pun with subregs
1304
     if we aren't.  */
1305
  {
1306
    enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1307
    if (imode != GET_MODE (op0))
1308
      {
1309
        if (MEM_P (op0))
1310
          op0 = adjust_address (op0, imode, 0);
1311
        else if (imode != BLKmode)
1312
          {
1313
            op0 = gen_lowpart (imode, op0);
1314
 
1315
            /* If we got a SUBREG, force it into a register since we
1316
               aren't going to be able to do another SUBREG on it.  */
1317
            if (GET_CODE (op0) == SUBREG)
1318
              op0 = force_reg (imode, op0);
1319
          }
1320
        else if (REG_P (op0))
1321
          {
1322
            rtx reg, subreg;
1323
            imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1324
                                            MODE_INT);
1325
            reg = gen_reg_rtx (imode);
1326
            subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1327
            emit_move_insn (subreg, op0);
1328
            op0 = reg;
1329
            bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1330
          }
1331
        else
1332
          {
1333
            rtx mem = assign_stack_temp (GET_MODE (op0),
1334
                                         GET_MODE_SIZE (GET_MODE (op0)), 0);
1335
            emit_move_insn (mem, op0);
1336
            op0 = adjust_address (mem, BLKmode, 0);
1337
          }
1338
      }
1339
  }
1340
 
1341
  /* We may be accessing data outside the field, which means
1342
     we can alias adjacent data.  */
1343
  if (MEM_P (op0))
1344
    {
1345
      op0 = shallow_copy_rtx (op0);
1346
      set_mem_alias_set (op0, 0);
1347
      set_mem_expr (op0, 0);
1348
    }
1349
 
1350
  /* Extraction of a full-word or multi-word value from a structure
1351
     in a register or aligned memory can be done with just a SUBREG.
1352
     A subword value in the least significant part of a register
1353
     can also be extracted with a SUBREG.  For this, we need the
1354
     byte offset of the value in op0.  */
1355
 
1356
  bitpos = bitnum % unit;
1357
  offset = bitnum / unit;
1358
  byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1359
 
1360
  /* If OP0 is a register, BITPOS must count within a word.
1361
     But as we have it, it counts within whatever size OP0 now has.
1362
     On a bigendian machine, these are not the same, so convert.  */
1363
  if (BYTES_BIG_ENDIAN
1364
      && !MEM_P (op0)
1365
      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1366
    bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1367
 
1368
  /* ??? We currently assume TARGET is at least as big as BITSIZE.
1369
     If that's wrong, the solution is to test for it and set TARGET to 0
1370
     if needed.  */
1371
 
1372
  /* Only scalar integer modes can be converted via subregs.  There is an
1373
     additional problem for FP modes here in that they can have a precision
1374
     which is different from the size.  mode_for_size uses precision, but
1375
     we want a mode based on the size, so we must avoid calling it for FP
1376
     modes.  */
1377
  mode1  = (SCALAR_INT_MODE_P (tmode)
1378
            ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1379
            : mode);
1380
 
1381
  /* If the bitfield is volatile, we need to make sure the access
1382
     remains on a type-aligned boundary.  */
1383
  if (GET_CODE (op0) == MEM
1384
      && MEM_VOLATILE_P (op0)
1385
      && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1386
      && flag_strict_volatile_bitfields > 0)
1387
    goto no_subreg_mode_swap;
1388
 
1389
  if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1390
        && bitpos % BITS_PER_WORD == 0)
1391
       || (mode1 != BLKmode
1392
           /* ??? The big endian test here is wrong.  This is correct
1393
              if the value is in a register, and if mode_for_size is not
1394
              the same mode as op0.  This causes us to get unnecessarily
1395
              inefficient code from the Thumb port when -mbig-endian.  */
1396
           && (BYTES_BIG_ENDIAN
1397
               ? bitpos + bitsize == BITS_PER_WORD
1398
               : bitpos == 0)))
1399
      && ((!MEM_P (op0)
1400
           && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1401
           && GET_MODE_SIZE (mode1) != 0
1402
           && byte_offset % GET_MODE_SIZE (mode1) == 0)
1403
          || (MEM_P (op0)
1404
              && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1405
                  || (offset * BITS_PER_UNIT % bitsize == 0
1406
                      && MEM_ALIGN (op0) % bitsize == 0)))))
1407
    {
1408
      if (MEM_P (op0))
1409
        op0 = adjust_address (op0, mode1, offset);
1410
      else if (mode1 != GET_MODE (op0))
1411
        {
1412
          rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1413
                                         byte_offset);
1414
          if (sub == NULL)
1415
            goto no_subreg_mode_swap;
1416
          op0 = sub;
1417
        }
1418
      if (mode1 != mode)
1419
        return convert_to_mode (tmode, op0, unsignedp);
1420
      return op0;
1421
    }
1422
 no_subreg_mode_swap:
1423
 
1424
  /* Handle fields bigger than a word.  */
1425
 
1426
  if (bitsize > BITS_PER_WORD)
1427
    {
1428
      /* Here we transfer the words of the field
1429
         in the order least significant first.
1430
         This is because the most significant word is the one which may
1431
         be less than full.  */
1432
 
1433
      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1434
      unsigned int i;
1435
 
1436
      if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1437
        target = gen_reg_rtx (mode);
1438
 
1439
      /* Indicate for flow that the entire target reg is being set.  */
1440
      emit_clobber (target);
1441
 
1442
      for (i = 0; i < nwords; i++)
1443
        {
1444
          /* If I is 0, use the low-order word in both field and target;
1445
             if I is 1, use the next to lowest word; and so on.  */
1446
          /* Word number in TARGET to use.  */
1447
          unsigned int wordnum
1448
            = (WORDS_BIG_ENDIAN
1449
               ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1450
               : i);
1451
          /* Offset from start of field in OP0.  */
1452
          unsigned int bit_offset = (WORDS_BIG_ENDIAN
1453
                                     ? MAX (0, ((int) bitsize - ((int) i + 1)
1454
                                                * (int) BITS_PER_WORD))
1455
                                     : (int) i * BITS_PER_WORD);
1456
          rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1457
          rtx result_part
1458
            = extract_bit_field (op0, MIN (BITS_PER_WORD,
1459
                                           bitsize - i * BITS_PER_WORD),
1460
                                 bitnum + bit_offset, 1, false, target_part, mode,
1461
                                 word_mode);
1462
 
1463
          gcc_assert (target_part);
1464
 
1465
          if (result_part != target_part)
1466
            emit_move_insn (target_part, result_part);
1467
        }
1468
 
1469
      if (unsignedp)
1470
        {
1471
          /* Unless we've filled TARGET, the upper regs in a multi-reg value
1472
             need to be zero'd out.  */
1473
          if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1474
            {
1475
              unsigned int i, total_words;
1476
 
1477
              total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1478
              for (i = nwords; i < total_words; i++)
1479
                emit_move_insn
1480
                  (operand_subword (target,
1481
                                    WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1482
                                    1, VOIDmode),
1483
                   const0_rtx);
1484
            }
1485
          return target;
1486
        }
1487
 
1488
      /* Signed bit field: sign-extend with two arithmetic shifts.  */
1489
      target = expand_shift (LSHIFT_EXPR, mode, target,
1490
                             GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1491
      return expand_shift (RSHIFT_EXPR, mode, target,
1492
                           GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1493
    }
1494
 
1495
  /* From here on we know the desired field is smaller than a word.  */
1496
 
1497
  /* Check if there is a correspondingly-sized integer field, so we can
1498
     safely extract it as one size of integer, if necessary; then
1499
     truncate or extend to the size that is wanted; then use SUBREGs or
1500
     convert_to_mode to get one of the modes we really wanted.  */
1501
 
1502
  int_mode = int_mode_for_mode (tmode);
1503
  if (int_mode == BLKmode)
1504
    int_mode = int_mode_for_mode (mode);
1505
  /* Should probably push op0 out to memory and then do a load.  */
1506
  gcc_assert (int_mode != BLKmode);
1507
 
1508
  /* OFFSET is the number of words or bytes (UNIT says which)
1509
     from STR_RTX to the first word or byte containing part of the field.  */
1510
  if (!MEM_P (op0))
1511
    {
1512
      if (offset != 0
1513
          || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1514
        {
1515
          if (!REG_P (op0))
1516
            op0 = copy_to_reg (op0);
1517
          op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1518
                                op0, (offset * UNITS_PER_WORD));
1519
        }
1520
      offset = 0;
1521
    }
1522
 
1523
  /* Now OFFSET is nonzero only for memory operands.  */
1524
  ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1525
  if (ext_mode != MAX_MACHINE_MODE
1526
      && bitsize > 0
1527
      && GET_MODE_BITSIZE (ext_mode) >= bitsize
1528
      /* Do not use extv/extzv for volatile bitfields when
1529
         -fstrict-volatile-bitfields is in effect.  */
1530
      && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1531
           && flag_strict_volatile_bitfields > 0)
1532
      /* If op0 is a register, we need it in EXT_MODE to make it
1533
         acceptable to the format of ext(z)v.  */
1534
      && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1535
      && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1536
           && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1537
    {
1538
      struct expand_operand ops[4];
1539
      unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1540
      rtx xop0 = op0;
1541
      rtx xtarget = target;
1542
      rtx xspec_target = target;
1543
      rtx xspec_target_subreg = 0;
1544
 
1545
      /* If op0 is a register, we need it in EXT_MODE to make it
1546
         acceptable to the format of ext(z)v.  */
1547
      if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1548
        xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1549
      if (MEM_P (xop0))
1550
        /* Get ref to first byte containing part of the field.  */
1551
        xop0 = adjust_address (xop0, byte_mode, xoffset);
1552
 
1553
      /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1554
      if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1555
        xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1556
 
1557
      unit = GET_MODE_BITSIZE (ext_mode);
1558
 
1559
      /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1560
         "backwards" from the size of the unit we are extracting from.
1561
         Otherwise, we count bits from the most significant on a
1562
         BYTES/BITS_BIG_ENDIAN machine.  */
1563
 
1564
      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1565
        xbitpos = unit - bitsize - xbitpos;
1566
 
1567
      if (xtarget == 0)
1568
        xtarget = xspec_target = gen_reg_rtx (tmode);
1569
 
1570
      if (GET_MODE (xtarget) != ext_mode)
1571
        {
1572
          /* Don't use LHS paradoxical subreg if explicit truncation is needed
1573
             between the mode of the extraction (word_mode) and the target
1574
             mode.  Instead, create a temporary and use convert_move to set
1575
             the target.  */
1576
          if (REG_P (xtarget)
1577
              && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1578
            {
1579
              xtarget = gen_lowpart (ext_mode, xtarget);
1580
              if (GET_MODE_PRECISION (ext_mode)
1581
                  > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1582
                xspec_target_subreg = xtarget;
1583
            }
1584
          else
1585
            xtarget = gen_reg_rtx (ext_mode);
1586
        }
1587
 
1588
      create_output_operand (&ops[0], xtarget, ext_mode);
1589
      create_fixed_operand (&ops[1], xop0);
1590
      create_integer_operand (&ops[2], bitsize);
1591
      create_integer_operand (&ops[3], xbitpos);
1592
      if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1593
                             4, ops))
1594
        {
1595
          xtarget = ops[0].value;
1596
          if (xtarget == xspec_target)
1597
            return xtarget;
1598
          if (xtarget == xspec_target_subreg)
1599
            return xspec_target;
1600
          return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1601
        }
1602
    }
1603
 
1604
  /* If OP0 is a memory, try copying it to a register and seeing if a
1605
     cheap register alternative is available.  */
1606
  if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1607
    {
1608
      enum machine_mode bestmode;
1609
 
1610
      /* Get the mode to use for inserting into this field.  If
1611
         OP0 is BLKmode, get the smallest mode consistent with the
1612
         alignment. If OP0 is a non-BLKmode object that is no
1613
         wider than EXT_MODE, use its mode. Otherwise, use the
1614
         smallest mode containing the field.  */
1615
 
1616
      if (GET_MODE (op0) == BLKmode
1617
          || (ext_mode != MAX_MACHINE_MODE
1618
              && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1619
        bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1620
                                  (ext_mode == MAX_MACHINE_MODE
1621
                                   ? VOIDmode : ext_mode),
1622
                                  MEM_VOLATILE_P (op0));
1623
      else
1624
        bestmode = GET_MODE (op0);
1625
 
1626
      if (bestmode != VOIDmode
1627
          && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1628
               && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1629
        {
1630
          unsigned HOST_WIDE_INT xoffset, xbitpos;
1631
 
1632
          /* Compute the offset as a multiple of this unit,
1633
             counting in bytes.  */
1634
          unit = GET_MODE_BITSIZE (bestmode);
1635
          xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1636
          xbitpos = bitnum % unit;
1637
 
1638
          /* Make sure the register is big enough for the whole field.  */
1639
          if (xoffset * BITS_PER_UNIT + unit
1640
              >= offset * BITS_PER_UNIT + bitsize)
1641
            {
1642
              rtx last, result, xop0;
1643
 
1644
              last = get_last_insn ();
1645
 
1646
              /* Fetch it to a register in that size.  */
1647
              xop0 = adjust_address (op0, bestmode, xoffset);
1648
              xop0 = force_reg (bestmode, xop0);
1649
              result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1650
                                            unsignedp, packedp, target,
1651
                                            mode, tmode, false);
1652
              if (result)
1653
                return result;
1654
 
1655
              delete_insns_since (last);
1656
            }
1657
        }
1658
    }
1659
 
1660
  if (!fallback_p)
1661
    return NULL;
1662
 
1663
  target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1664
                                    bitpos, target, unsignedp, packedp);
1665
  return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1666
}
1667
 
1668
/* Generate code to extract a byte-field from STR_RTX
1669
   containing BITSIZE bits, starting at BITNUM,
1670
   and put it in TARGET if possible (if TARGET is nonzero).
1671
   Regardless of TARGET, we return the rtx for where the value is placed.
1672
 
1673
   STR_RTX is the structure containing the byte (a REG or MEM).
1674
   UNSIGNEDP is nonzero if this is an unsigned bit field.
1675
   PACKEDP is nonzero if the field has the packed attribute.
1676
   MODE is the natural mode of the field value once extracted.
1677
   TMODE is the mode the caller would like the value to have;
1678
   but the value may be returned with type MODE instead.
1679
 
1680
   If a TARGET is specified and we can store in it at no extra cost,
1681
   we do so, and return TARGET.
1682
   Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1683
   if they are equally easy.  */
1684
 
1685
rtx
1686
extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1687
                   unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1688
                   rtx target, enum machine_mode mode, enum machine_mode tmode)
1689
{
1690
  return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1691
                              target, mode, tmode, true);
1692
}
1693
 
1694
/* Extract a bit field using shifts and boolean operations
1695
   Returns an rtx to represent the value.
1696
   OP0 addresses a register (word) or memory (byte).
1697
   BITPOS says which bit within the word or byte the bit field starts in.
1698
   OFFSET says how many bytes farther the bit field starts;
1699
    it is 0 if OP0 is a register.
1700
   BITSIZE says how many bits long the bit field is.
1701
    (If OP0 is a register, it may be narrower than a full word,
1702
     but BITPOS still counts within a full word,
1703
     which is significant on bigendian machines.)
1704
 
1705
   UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1706
   PACKEDP is true if the field has the packed attribute.
1707
 
1708
   If TARGET is nonzero, attempts to store the value there
1709
   and return TARGET, but this is not guaranteed.
1710
   If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1711
 
1712
static rtx
1713
extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1714
                         unsigned HOST_WIDE_INT offset,
1715
                         unsigned HOST_WIDE_INT bitsize,
1716
                         unsigned HOST_WIDE_INT bitpos, rtx target,
1717
                         int unsignedp, bool packedp)
1718
{
1719
  unsigned int total_bits = BITS_PER_WORD;
1720
  enum machine_mode mode;
1721
 
1722
  if (GET_CODE (op0) == SUBREG || REG_P (op0))
1723
    {
1724
      /* Special treatment for a bit field split across two registers.  */
1725
      if (bitsize + bitpos > BITS_PER_WORD)
1726
        return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1727
    }
1728
  else
1729
    {
1730
      /* Get the proper mode to use for this field.  We want a mode that
1731
         includes the entire field.  If such a mode would be larger than
1732
         a word, we won't be doing the extraction the normal way.  */
1733
 
1734
      if (MEM_VOLATILE_P (op0)
1735
          && flag_strict_volatile_bitfields > 0)
1736
        {
1737
          if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1738
            mode = GET_MODE (op0);
1739
          else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1740
            mode = GET_MODE (target);
1741
          else
1742
            mode = tmode;
1743
        }
1744
      else
1745
        mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1746
                              MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1747
 
1748
      if (mode == VOIDmode)
1749
        /* The only way this should occur is if the field spans word
1750
           boundaries.  */
1751
        return extract_split_bit_field (op0, bitsize,
1752
                                        bitpos + offset * BITS_PER_UNIT,
1753
                                        unsignedp);
1754
 
1755
      total_bits = GET_MODE_BITSIZE (mode);
1756
 
1757
      /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1758
         be in the range 0 to total_bits-1, and put any excess bytes in
1759
         OFFSET.  */
1760
      if (bitpos >= total_bits)
1761
        {
1762
          offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1763
          bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1764
                     * BITS_PER_UNIT);
1765
        }
1766
 
1767
      /* If we're accessing a volatile MEM, we can't do the next
1768
         alignment step if it results in a multi-word access where we
1769
         otherwise wouldn't have one.  So, check for that case
1770
         here.  */
1771
      if (MEM_P (op0)
1772
          && MEM_VOLATILE_P (op0)
1773
          && flag_strict_volatile_bitfields > 0
1774
          && bitpos + bitsize <= total_bits
1775
          && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1776
        {
1777
          if (STRICT_ALIGNMENT)
1778
            {
1779
              static bool informed_about_misalignment = false;
1780
              bool warned;
1781
 
1782
              if (packedp)
1783
                {
1784
                  if (bitsize == total_bits)
1785
                    warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1786
                                         "multiple accesses to volatile structure member"
1787
                                         " because of packed attribute");
1788
                  else
1789
                    warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1790
                                         "multiple accesses to volatile structure bitfield"
1791
                                         " because of packed attribute");
1792
 
1793
                  return extract_split_bit_field (op0, bitsize,
1794
                                                  bitpos + offset * BITS_PER_UNIT,
1795
                                                  unsignedp);
1796
                }
1797
 
1798
              if (bitsize == total_bits)
1799
                warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1800
                                     "mis-aligned access used for structure member");
1801
              else
1802
                warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1803
                                     "mis-aligned access used for structure bitfield");
1804
 
1805
              if (! informed_about_misalignment && warned)
1806
                {
1807
                  informed_about_misalignment = true;
1808
                  inform (input_location,
1809
                          "when a volatile object spans multiple type-sized locations,"
1810
                          " the compiler must choose between using a single mis-aligned access to"
1811
                          " preserve the volatility, or using multiple aligned accesses to avoid"
1812
                          " runtime faults; this code may fail at runtime if the hardware does"
1813
                          " not allow this access");
1814
                }
1815
            }
1816
        }
1817
      else
1818
        {
1819
 
1820
          /* Get ref to an aligned byte, halfword, or word containing the field.
1821
             Adjust BITPOS to be position within a word,
1822
             and OFFSET to be the offset of that word.
1823
             Then alter OP0 to refer to that word.  */
1824
          bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1825
          offset -= (offset % (total_bits / BITS_PER_UNIT));
1826
        }
1827
 
1828
      op0 = adjust_address (op0, mode, offset);
1829
    }
1830
 
1831
  mode = GET_MODE (op0);
1832
 
1833
  if (BYTES_BIG_ENDIAN)
1834
    /* BITPOS is the distance between our msb and that of OP0.
1835
       Convert it to the distance from the lsb.  */
1836
    bitpos = total_bits - bitsize - bitpos;
1837
 
1838
  /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1839
     We have reduced the big-endian case to the little-endian case.  */
1840
 
1841
  if (unsignedp)
1842
    {
1843
      if (bitpos)
1844
        {
1845
          /* If the field does not already start at the lsb,
1846
             shift it so it does.  */
1847
          /* Maybe propagate the target for the shift.  */
1848
          /* But not if we will return it--could confuse integrate.c.  */
1849
          rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1850
          if (tmode != mode) subtarget = 0;
1851
          op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1852
        }
1853
      /* Convert the value to the desired mode.  */
1854
      if (mode != tmode)
1855
        op0 = convert_to_mode (tmode, op0, 1);
1856
 
1857
      /* Unless the msb of the field used to be the msb when we shifted,
1858
         mask out the upper bits.  */
1859
 
1860
      if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1861
        return expand_binop (GET_MODE (op0), and_optab, op0,
1862
                             mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1863
                             target, 1, OPTAB_LIB_WIDEN);
1864
      return op0;
1865
    }
1866
 
1867
  /* To extract a signed bit-field, first shift its msb to the msb of the word,
1868
     then arithmetic-shift its lsb to the lsb of the word.  */
1869
  op0 = force_reg (mode, op0);
1870
 
1871
  /* Find the narrowest integer mode that contains the field.  */
1872
 
1873
  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1874
       mode = GET_MODE_WIDER_MODE (mode))
1875
    if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1876
      {
1877
        op0 = convert_to_mode (mode, op0, 0);
1878
        break;
1879
      }
1880
 
1881
  if (mode != tmode)
1882
    target = 0;
1883
 
1884
  if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1885
    {
1886
      int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1887
      /* Maybe propagate the target for the shift.  */
1888
      rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1889
      op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1890
    }
1891
 
1892
  return expand_shift (RSHIFT_EXPR, mode, op0,
1893
                       GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1894
}
1895
 
1896
/* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1897
   of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1898
   complement of that if COMPLEMENT.  The mask is truncated if
1899
   necessary to the width of mode MODE.  The mask is zero-extended if
1900
   BITSIZE+BITPOS is too small for MODE.  */
1901
 
1902
static rtx
1903
mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1904
{
1905
  double_int mask;
1906
 
1907
  mask = double_int_mask (bitsize);
1908
  mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1909
 
1910
  if (complement)
1911
    mask = double_int_not (mask);
1912
 
1913
  return immed_double_int_const (mask, mode);
1914
}
1915
 
1916
/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1917
   VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1918
 
1919
static rtx
1920
lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1921
{
1922
  double_int val;
1923
 
1924
  val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1925
  val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1926
 
1927
  return immed_double_int_const (val, mode);
1928
}
1929
 
1930
/* Extract a bit field that is split across two words
1931
   and return an RTX for the result.
1932
 
1933
   OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1934
   BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1935
   UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1936
 
1937
static rtx
1938
extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1939
                         unsigned HOST_WIDE_INT bitpos, int unsignedp)
1940
{
1941
  unsigned int unit;
1942
  unsigned int bitsdone = 0;
1943
  rtx result = NULL_RTX;
1944
  int first = 1;
1945
 
1946
  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1947
     much at a time.  */
1948
  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1949
    unit = BITS_PER_WORD;
1950
  else
1951
    unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1952
 
1953
  while (bitsdone < bitsize)
1954
    {
1955
      unsigned HOST_WIDE_INT thissize;
1956
      rtx part, word;
1957
      unsigned HOST_WIDE_INT thispos;
1958
      unsigned HOST_WIDE_INT offset;
1959
 
1960
      offset = (bitpos + bitsdone) / unit;
1961
      thispos = (bitpos + bitsdone) % unit;
1962
 
1963
      /* THISSIZE must not overrun a word boundary.  Otherwise,
1964
         extract_fixed_bit_field will call us again, and we will mutually
1965
         recurse forever.  */
1966
      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1967
      thissize = MIN (thissize, unit - thispos);
1968
 
1969
      /* If OP0 is a register, then handle OFFSET here.
1970
 
1971
         When handling multiword bitfields, extract_bit_field may pass
1972
         down a word_mode SUBREG of a larger REG for a bitfield that actually
1973
         crosses a word boundary.  Thus, for a SUBREG, we must find
1974
         the current word starting from the base register.  */
1975
      if (GET_CODE (op0) == SUBREG)
1976
        {
1977
          int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1978
          word = operand_subword_force (SUBREG_REG (op0), word_offset,
1979
                                        GET_MODE (SUBREG_REG (op0)));
1980
          offset = 0;
1981
        }
1982
      else if (REG_P (op0))
1983
        {
1984
          word = operand_subword_force (op0, offset, GET_MODE (op0));
1985
          offset = 0;
1986
        }
1987
      else
1988
        word = op0;
1989
 
1990
      /* Extract the parts in bit-counting order,
1991
         whose meaning is determined by BYTES_PER_UNIT.
1992
         OFFSET is in UNITs, and UNIT is in bits.
1993
         extract_fixed_bit_field wants offset in bytes.  */
1994
      part = extract_fixed_bit_field (word_mode, word,
1995
                                      offset * unit / BITS_PER_UNIT,
1996
                                      thissize, thispos, 0, 1, false);
1997
      bitsdone += thissize;
1998
 
1999
      /* Shift this part into place for the result.  */
2000
      if (BYTES_BIG_ENDIAN)
2001
        {
2002
          if (bitsize != bitsdone)
2003
            part = expand_shift (LSHIFT_EXPR, word_mode, part,
2004
                                 bitsize - bitsdone, 0, 1);
2005
        }
2006
      else
2007
        {
2008
          if (bitsdone != thissize)
2009
            part = expand_shift (LSHIFT_EXPR, word_mode, part,
2010
                                 bitsdone - thissize, 0, 1);
2011
        }
2012
 
2013
      if (first)
2014
        result = part;
2015
      else
2016
        /* Combine the parts with bitwise or.  This works
2017
           because we extracted each part as an unsigned bit field.  */
2018
        result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2019
                               OPTAB_LIB_WIDEN);
2020
 
2021
      first = 0;
2022
    }
2023
 
2024
  /* Unsigned bit field: we are done.  */
2025
  if (unsignedp)
2026
    return result;
2027
  /* Signed bit field: sign-extend with two arithmetic shifts.  */
2028
  result = expand_shift (LSHIFT_EXPR, word_mode, result,
2029
                         BITS_PER_WORD - bitsize, NULL_RTX, 0);
2030
  return expand_shift (RSHIFT_EXPR, word_mode, result,
2031
                       BITS_PER_WORD - bitsize, NULL_RTX, 0);
2032
}
2033
 
2034
/* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2035
   the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2036
   MODE, fill the upper bits with zeros.  Fail if the layout of either
2037
   mode is unknown (as for CC modes) or if the extraction would involve
2038
   unprofitable mode punning.  Return the value on success, otherwise
2039
   return null.
2040
 
2041
   This is different from gen_lowpart* in these respects:
2042
 
2043
     - the returned value must always be considered an rvalue
2044
 
2045
     - when MODE is wider than SRC_MODE, the extraction involves
2046
       a zero extension
2047
 
2048
     - when MODE is smaller than SRC_MODE, the extraction involves
2049
       a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2050
 
2051
   In other words, this routine performs a computation, whereas the
2052
   gen_lowpart* routines are conceptually lvalue or rvalue subreg
2053
   operations.  */
2054
 
2055
rtx
2056
extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2057
{
2058
  enum machine_mode int_mode, src_int_mode;
2059
 
2060
  if (mode == src_mode)
2061
    return src;
2062
 
2063
  if (CONSTANT_P (src))
2064
    {
2065
      /* simplify_gen_subreg can't be used here, as if simplify_subreg
2066
         fails, it will happily create (subreg (symbol_ref)) or similar
2067
         invalid SUBREGs.  */
2068
      unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2069
      rtx ret = simplify_subreg (mode, src, src_mode, byte);
2070
      if (ret)
2071
        return ret;
2072
 
2073
      if (GET_MODE (src) == VOIDmode
2074
          || !validate_subreg (mode, src_mode, src, byte))
2075
        return NULL_RTX;
2076
 
2077
      src = force_reg (GET_MODE (src), src);
2078
      return gen_rtx_SUBREG (mode, src, byte);
2079
    }
2080
 
2081
  if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2082
    return NULL_RTX;
2083
 
2084
  if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2085
      && MODES_TIEABLE_P (mode, src_mode))
2086
    {
2087
      rtx x = gen_lowpart_common (mode, src);
2088
      if (x)
2089
        return x;
2090
    }
2091
 
2092
  src_int_mode = int_mode_for_mode (src_mode);
2093
  int_mode = int_mode_for_mode (mode);
2094
  if (src_int_mode == BLKmode || int_mode == BLKmode)
2095
    return NULL_RTX;
2096
 
2097
  if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2098
    return NULL_RTX;
2099
  if (!MODES_TIEABLE_P (int_mode, mode))
2100
    return NULL_RTX;
2101
 
2102
  src = gen_lowpart (src_int_mode, src);
2103
  src = convert_modes (int_mode, src_int_mode, src, true);
2104
  src = gen_lowpart (mode, src);
2105
  return src;
2106
}
2107
 
2108
/* Add INC into TARGET.  */
2109
 
2110
void
2111
expand_inc (rtx target, rtx inc)
2112
{
2113
  rtx value = expand_binop (GET_MODE (target), add_optab,
2114
                            target, inc,
2115
                            target, 0, OPTAB_LIB_WIDEN);
2116
  if (value != target)
2117
    emit_move_insn (target, value);
2118
}
2119
 
2120
/* Subtract DEC from TARGET.  */
2121
 
2122
void
2123
expand_dec (rtx target, rtx dec)
2124
{
2125
  rtx value = expand_binop (GET_MODE (target), sub_optab,
2126
                            target, dec,
2127
                            target, 0, OPTAB_LIB_WIDEN);
2128
  if (value != target)
2129
    emit_move_insn (target, value);
2130
}
2131
 
2132
/* Output a shift instruction for expression code CODE,
2133
   with SHIFTED being the rtx for the value to shift,
2134
   and AMOUNT the rtx for the amount to shift by.
2135
   Store the result in the rtx TARGET, if that is convenient.
2136
   If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2137
   Return the rtx for where the value is.  */
2138
 
2139
static rtx
2140
expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2141
                rtx amount, rtx target, int unsignedp)
2142
{
2143
  rtx op1, temp = 0;
2144
  int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2145
  int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2146
  optab lshift_optab = ashl_optab;
2147
  optab rshift_arith_optab = ashr_optab;
2148
  optab rshift_uns_optab = lshr_optab;
2149
  optab lrotate_optab = rotl_optab;
2150
  optab rrotate_optab = rotr_optab;
2151
  enum machine_mode op1_mode;
2152
  int attempt;
2153
  bool speed = optimize_insn_for_speed_p ();
2154
 
2155
  op1 = amount;
2156
  op1_mode = GET_MODE (op1);
2157
 
2158
  /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2159
     shift amount is a vector, use the vector/vector shift patterns.  */
2160
  if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2161
    {
2162
      lshift_optab = vashl_optab;
2163
      rshift_arith_optab = vashr_optab;
2164
      rshift_uns_optab = vlshr_optab;
2165
      lrotate_optab = vrotl_optab;
2166
      rrotate_optab = vrotr_optab;
2167
    }
2168
 
2169
  /* Previously detected shift-counts computed by NEGATE_EXPR
2170
     and shifted in the other direction; but that does not work
2171
     on all machines.  */
2172
 
2173
  if (SHIFT_COUNT_TRUNCATED)
2174
    {
2175
      if (CONST_INT_P (op1)
2176
          && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2177
              (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2178
        op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2179
                       % GET_MODE_BITSIZE (mode));
2180
      else if (GET_CODE (op1) == SUBREG
2181
               && subreg_lowpart_p (op1)
2182
               && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2183
        op1 = SUBREG_REG (op1);
2184
    }
2185
 
2186
  if (op1 == const0_rtx)
2187
    return shifted;
2188
 
2189
  /* Check whether its cheaper to implement a left shift by a constant
2190
     bit count by a sequence of additions.  */
2191
  if (code == LSHIFT_EXPR
2192
      && CONST_INT_P (op1)
2193
      && INTVAL (op1) > 0
2194
      && INTVAL (op1) < GET_MODE_PRECISION (mode)
2195
      && INTVAL (op1) < MAX_BITS_PER_WORD
2196
      && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2197
      && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2198
    {
2199
      int i;
2200
      for (i = 0; i < INTVAL (op1); i++)
2201
        {
2202
          temp = force_reg (mode, shifted);
2203
          shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2204
                                  unsignedp, OPTAB_LIB_WIDEN);
2205
        }
2206
      return shifted;
2207
    }
2208
 
2209
  for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2210
    {
2211
      enum optab_methods methods;
2212
 
2213
      if (attempt == 0)
2214
        methods = OPTAB_DIRECT;
2215
      else if (attempt == 1)
2216
        methods = OPTAB_WIDEN;
2217
      else
2218
        methods = OPTAB_LIB_WIDEN;
2219
 
2220
      if (rotate)
2221
        {
2222
          /* Widening does not work for rotation.  */
2223
          if (methods == OPTAB_WIDEN)
2224
            continue;
2225
          else if (methods == OPTAB_LIB_WIDEN)
2226
            {
2227
              /* If we have been unable to open-code this by a rotation,
2228
                 do it as the IOR of two shifts.  I.e., to rotate A
2229
                 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2230
                 where C is the bitsize of A.
2231
 
2232
                 It is theoretically possible that the target machine might
2233
                 not be able to perform either shift and hence we would
2234
                 be making two libcalls rather than just the one for the
2235
                 shift (similarly if IOR could not be done).  We will allow
2236
                 this extremely unlikely lossage to avoid complicating the
2237
                 code below.  */
2238
 
2239
              rtx subtarget = target == shifted ? 0 : target;
2240
              rtx new_amount, other_amount;
2241
              rtx temp1;
2242
 
2243
              new_amount = op1;
2244
              if (CONST_INT_P (op1))
2245
                other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2246
                                        - INTVAL (op1));
2247
              else
2248
                other_amount
2249
                  = simplify_gen_binary (MINUS, GET_MODE (op1),
2250
                                         GEN_INT (GET_MODE_PRECISION (mode)),
2251
                                         op1);
2252
 
2253
              shifted = force_reg (mode, shifted);
2254
 
2255
              temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2256
                                     mode, shifted, new_amount, 0, 1);
2257
              temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2258
                                      mode, shifted, other_amount,
2259
                                      subtarget, 1);
2260
              return expand_binop (mode, ior_optab, temp, temp1, target,
2261
                                   unsignedp, methods);
2262
            }
2263
 
2264
          temp = expand_binop (mode,
2265
                               left ? lrotate_optab : rrotate_optab,
2266
                               shifted, op1, target, unsignedp, methods);
2267
        }
2268
      else if (unsignedp)
2269
        temp = expand_binop (mode,
2270
                             left ? lshift_optab : rshift_uns_optab,
2271
                             shifted, op1, target, unsignedp, methods);
2272
 
2273
      /* Do arithmetic shifts.
2274
         Also, if we are going to widen the operand, we can just as well
2275
         use an arithmetic right-shift instead of a logical one.  */
2276
      if (temp == 0 && ! rotate
2277
          && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2278
        {
2279
          enum optab_methods methods1 = methods;
2280
 
2281
          /* If trying to widen a log shift to an arithmetic shift,
2282
             don't accept an arithmetic shift of the same size.  */
2283
          if (unsignedp)
2284
            methods1 = OPTAB_MUST_WIDEN;
2285
 
2286
          /* Arithmetic shift */
2287
 
2288
          temp = expand_binop (mode,
2289
                               left ? lshift_optab : rshift_arith_optab,
2290
                               shifted, op1, target, unsignedp, methods1);
2291
        }
2292
 
2293
      /* We used to try extzv here for logical right shifts, but that was
2294
         only useful for one machine, the VAX, and caused poor code
2295
         generation there for lshrdi3, so the code was deleted and a
2296
         define_expand for lshrsi3 was added to vax.md.  */
2297
    }
2298
 
2299
  gcc_assert (temp);
2300
  return temp;
2301
}
2302
 
2303
/* Output a shift instruction for expression code CODE,
2304
   with SHIFTED being the rtx for the value to shift,
2305
   and AMOUNT the amount to shift by.
2306
   Store the result in the rtx TARGET, if that is convenient.
2307
   If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2308
   Return the rtx for where the value is.  */
2309
 
2310
rtx
2311
expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2312
              int amount, rtx target, int unsignedp)
2313
{
2314
  return expand_shift_1 (code, mode,
2315
                         shifted, GEN_INT (amount), target, unsignedp);
2316
}
2317
 
2318
/* Output a shift instruction for expression code CODE,
2319
   with SHIFTED being the rtx for the value to shift,
2320
   and AMOUNT the tree for the amount to shift by.
2321
   Store the result in the rtx TARGET, if that is convenient.
2322
   If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2323
   Return the rtx for where the value is.  */
2324
 
2325
rtx
2326
expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2327
                       tree amount, rtx target, int unsignedp)
2328
{
2329
  return expand_shift_1 (code, mode,
2330
                         shifted, expand_normal (amount), target, unsignedp);
2331
}
2332
 
2333
 
2334
/* Indicates the type of fixup needed after a constant multiplication.
2335
   BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2336
   the result should be negated, and ADD_VARIANT means that the
2337
   multiplicand should be added to the result.  */
2338
enum mult_variant {basic_variant, negate_variant, add_variant};
2339
 
2340
static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2341
                        const struct mult_cost *, enum machine_mode mode);
2342
static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2343
                                 struct algorithm *, enum mult_variant *, int);
2344
static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2345
                              const struct algorithm *, enum mult_variant);
2346
static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2347
                                                 int, rtx *, int *, int *);
2348
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2349
static rtx extract_high_half (enum machine_mode, rtx);
2350
static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2351
static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2352
                                       int, int);
2353
/* Compute and return the best algorithm for multiplying by T.
2354
   The algorithm must cost less than cost_limit
2355
   If retval.cost >= COST_LIMIT, no algorithm was found and all
2356
   other field of the returned struct are undefined.
2357
   MODE is the machine mode of the multiplication.  */
2358
 
2359
static void
2360
synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2361
            const struct mult_cost *cost_limit, enum machine_mode mode)
2362
{
2363
  int m;
2364
  struct algorithm *alg_in, *best_alg;
2365
  struct mult_cost best_cost;
2366
  struct mult_cost new_limit;
2367
  int op_cost, op_latency;
2368
  unsigned HOST_WIDE_INT orig_t = t;
2369
  unsigned HOST_WIDE_INT q;
2370
  int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2371
  int hash_index;
2372
  bool cache_hit = false;
2373
  enum alg_code cache_alg = alg_zero;
2374
  bool speed = optimize_insn_for_speed_p ();
2375
 
2376
  /* Indicate that no algorithm is yet found.  If no algorithm
2377
     is found, this value will be returned and indicate failure.  */
2378
  alg_out->cost.cost = cost_limit->cost + 1;
2379
  alg_out->cost.latency = cost_limit->latency + 1;
2380
 
2381
  if (cost_limit->cost < 0
2382
      || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2383
    return;
2384
 
2385
  /* Restrict the bits of "t" to the multiplication's mode.  */
2386
  t &= GET_MODE_MASK (mode);
2387
 
2388
  /* t == 1 can be done in zero cost.  */
2389
  if (t == 1)
2390
    {
2391
      alg_out->ops = 1;
2392
      alg_out->cost.cost = 0;
2393
      alg_out->cost.latency = 0;
2394
      alg_out->op[0] = alg_m;
2395
      return;
2396
    }
2397
 
2398
  /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2399
     fail now.  */
2400
  if (t == 0)
2401
    {
2402
      if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2403
        return;
2404
      else
2405
        {
2406
          alg_out->ops = 1;
2407
          alg_out->cost.cost = zero_cost[speed];
2408
          alg_out->cost.latency = zero_cost[speed];
2409
          alg_out->op[0] = alg_zero;
2410
          return;
2411
        }
2412
    }
2413
 
2414
  /* We'll be needing a couple extra algorithm structures now.  */
2415
 
2416
  alg_in = XALLOCA (struct algorithm);
2417
  best_alg = XALLOCA (struct algorithm);
2418
  best_cost = *cost_limit;
2419
 
2420
  /* Compute the hash index.  */
2421
  hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2422
 
2423
  /* See if we already know what to do for T.  */
2424
  if (alg_hash[hash_index].t == t
2425
      && alg_hash[hash_index].mode == mode
2426
      && alg_hash[hash_index].mode == mode
2427
      && alg_hash[hash_index].speed == speed
2428
      && alg_hash[hash_index].alg != alg_unknown)
2429
    {
2430
      cache_alg = alg_hash[hash_index].alg;
2431
 
2432
      if (cache_alg == alg_impossible)
2433
        {
2434
          /* The cache tells us that it's impossible to synthesize
2435
             multiplication by T within alg_hash[hash_index].cost.  */
2436
          if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2437
            /* COST_LIMIT is at least as restrictive as the one
2438
               recorded in the hash table, in which case we have no
2439
               hope of synthesizing a multiplication.  Just
2440
               return.  */
2441
            return;
2442
 
2443
          /* If we get here, COST_LIMIT is less restrictive than the
2444
             one recorded in the hash table, so we may be able to
2445
             synthesize a multiplication.  Proceed as if we didn't
2446
             have the cache entry.  */
2447
        }
2448
      else
2449
        {
2450
          if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2451
            /* The cached algorithm shows that this multiplication
2452
               requires more cost than COST_LIMIT.  Just return.  This
2453
               way, we don't clobber this cache entry with
2454
               alg_impossible but retain useful information.  */
2455
            return;
2456
 
2457
          cache_hit = true;
2458
 
2459
          switch (cache_alg)
2460
            {
2461
            case alg_shift:
2462
              goto do_alg_shift;
2463
 
2464
            case alg_add_t_m2:
2465
            case alg_sub_t_m2:
2466
              goto do_alg_addsub_t_m2;
2467
 
2468
            case alg_add_factor:
2469
            case alg_sub_factor:
2470
              goto do_alg_addsub_factor;
2471
 
2472
            case alg_add_t2_m:
2473
              goto do_alg_add_t2_m;
2474
 
2475
            case alg_sub_t2_m:
2476
              goto do_alg_sub_t2_m;
2477
 
2478
            default:
2479
              gcc_unreachable ();
2480
            }
2481
        }
2482
    }
2483
 
2484
  /* If we have a group of zero bits at the low-order part of T, try
2485
     multiplying by the remaining bits and then doing a shift.  */
2486
 
2487
  if ((t & 1) == 0)
2488
    {
2489
    do_alg_shift:
2490
      m = floor_log2 (t & -t);  /* m = number of low zero bits */
2491
      if (m < maxm)
2492
        {
2493
          q = t >> m;
2494
          /* The function expand_shift will choose between a shift and
2495
             a sequence of additions, so the observed cost is given as
2496
             MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2497
          op_cost = m * add_cost[speed][mode];
2498
          if (shift_cost[speed][mode][m] < op_cost)
2499
            op_cost = shift_cost[speed][mode][m];
2500
          new_limit.cost = best_cost.cost - op_cost;
2501
          new_limit.latency = best_cost.latency - op_cost;
2502
          synth_mult (alg_in, q, &new_limit, mode);
2503
 
2504
          alg_in->cost.cost += op_cost;
2505
          alg_in->cost.latency += op_cost;
2506
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2507
            {
2508
              struct algorithm *x;
2509
              best_cost = alg_in->cost;
2510
              x = alg_in, alg_in = best_alg, best_alg = x;
2511
              best_alg->log[best_alg->ops] = m;
2512
              best_alg->op[best_alg->ops] = alg_shift;
2513
            }
2514
 
2515
          /* See if treating ORIG_T as a signed number yields a better
2516
             sequence.  Try this sequence only for a negative ORIG_T
2517
             as it would be useless for a non-negative ORIG_T.  */
2518
          if ((HOST_WIDE_INT) orig_t < 0)
2519
            {
2520
              /* Shift ORIG_T as follows because a right shift of a
2521
                 negative-valued signed type is implementation
2522
                 defined.  */
2523
              q = ~(~orig_t >> m);
2524
              /* The function expand_shift will choose between a shift
2525
                 and a sequence of additions, so the observed cost is
2526
                 given as MIN (m * add_cost[speed][mode],
2527
                 shift_cost[speed][mode][m]).  */
2528
              op_cost = m * add_cost[speed][mode];
2529
              if (shift_cost[speed][mode][m] < op_cost)
2530
                op_cost = shift_cost[speed][mode][m];
2531
              new_limit.cost = best_cost.cost - op_cost;
2532
              new_limit.latency = best_cost.latency - op_cost;
2533
              synth_mult (alg_in, q, &new_limit, mode);
2534
 
2535
              alg_in->cost.cost += op_cost;
2536
              alg_in->cost.latency += op_cost;
2537
              if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2538
                {
2539
                  struct algorithm *x;
2540
                  best_cost = alg_in->cost;
2541
                  x = alg_in, alg_in = best_alg, best_alg = x;
2542
                  best_alg->log[best_alg->ops] = m;
2543
                  best_alg->op[best_alg->ops] = alg_shift;
2544
                }
2545
            }
2546
        }
2547
      if (cache_hit)
2548
        goto done;
2549
    }
2550
 
2551
  /* If we have an odd number, add or subtract one.  */
2552
  if ((t & 1) != 0)
2553
    {
2554
      unsigned HOST_WIDE_INT w;
2555
 
2556
    do_alg_addsub_t_m2:
2557
      for (w = 1; (w & t) != 0; w <<= 1)
2558
        ;
2559
      /* If T was -1, then W will be zero after the loop.  This is another
2560
         case where T ends with ...111.  Handling this with (T + 1) and
2561
         subtract 1 produces slightly better code and results in algorithm
2562
         selection much faster than treating it like the ...0111 case
2563
         below.  */
2564
      if (w == 0
2565
          || (w > 2
2566
              /* Reject the case where t is 3.
2567
                 Thus we prefer addition in that case.  */
2568
              && t != 3))
2569
        {
2570
          /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2571
 
2572
          op_cost = add_cost[speed][mode];
2573
          new_limit.cost = best_cost.cost - op_cost;
2574
          new_limit.latency = best_cost.latency - op_cost;
2575
          synth_mult (alg_in, t + 1, &new_limit, mode);
2576
 
2577
          alg_in->cost.cost += op_cost;
2578
          alg_in->cost.latency += op_cost;
2579
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2580
            {
2581
              struct algorithm *x;
2582
              best_cost = alg_in->cost;
2583
              x = alg_in, alg_in = best_alg, best_alg = x;
2584
              best_alg->log[best_alg->ops] = 0;
2585
              best_alg->op[best_alg->ops] = alg_sub_t_m2;
2586
            }
2587
        }
2588
      else
2589
        {
2590
          /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2591
 
2592
          op_cost = add_cost[speed][mode];
2593
          new_limit.cost = best_cost.cost - op_cost;
2594
          new_limit.latency = best_cost.latency - op_cost;
2595
          synth_mult (alg_in, t - 1, &new_limit, mode);
2596
 
2597
          alg_in->cost.cost += op_cost;
2598
          alg_in->cost.latency += op_cost;
2599
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2600
            {
2601
              struct algorithm *x;
2602
              best_cost = alg_in->cost;
2603
              x = alg_in, alg_in = best_alg, best_alg = x;
2604
              best_alg->log[best_alg->ops] = 0;
2605
              best_alg->op[best_alg->ops] = alg_add_t_m2;
2606
            }
2607
        }
2608
 
2609
      /* We may be able to calculate a * -7, a * -15, a * -31, etc
2610
         quickly with a - a * n for some appropriate constant n.  */
2611
      m = exact_log2 (-orig_t + 1);
2612
      if (m >= 0 && m < maxm)
2613
        {
2614
          op_cost = shiftsub1_cost[speed][mode][m];
2615
          new_limit.cost = best_cost.cost - op_cost;
2616
          new_limit.latency = best_cost.latency - op_cost;
2617
          synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2618
 
2619
          alg_in->cost.cost += op_cost;
2620
          alg_in->cost.latency += op_cost;
2621
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2622
            {
2623
              struct algorithm *x;
2624
              best_cost = alg_in->cost;
2625
              x = alg_in, alg_in = best_alg, best_alg = x;
2626
              best_alg->log[best_alg->ops] = m;
2627
              best_alg->op[best_alg->ops] = alg_sub_t_m2;
2628
            }
2629
        }
2630
 
2631
      if (cache_hit)
2632
        goto done;
2633
    }
2634
 
2635
  /* Look for factors of t of the form
2636
     t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2637
     If we find such a factor, we can multiply by t using an algorithm that
2638
     multiplies by q, shift the result by m and add/subtract it to itself.
2639
 
2640
     We search for large factors first and loop down, even if large factors
2641
     are less probable than small; if we find a large factor we will find a
2642
     good sequence quickly, and therefore be able to prune (by decreasing
2643
     COST_LIMIT) the search.  */
2644
 
2645
 do_alg_addsub_factor:
2646
  for (m = floor_log2 (t - 1); m >= 2; m--)
2647
    {
2648
      unsigned HOST_WIDE_INT d;
2649
 
2650
      d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2651
      if (t % d == 0 && t > d && m < maxm
2652
          && (!cache_hit || cache_alg == alg_add_factor))
2653
        {
2654
          /* If the target has a cheap shift-and-add instruction use
2655
             that in preference to a shift insn followed by an add insn.
2656
             Assume that the shift-and-add is "atomic" with a latency
2657
             equal to its cost, otherwise assume that on superscalar
2658
             hardware the shift may be executed concurrently with the
2659
             earlier steps in the algorithm.  */
2660
          op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2661
          if (shiftadd_cost[speed][mode][m] < op_cost)
2662
            {
2663
              op_cost = shiftadd_cost[speed][mode][m];
2664
              op_latency = op_cost;
2665
            }
2666
          else
2667
            op_latency = add_cost[speed][mode];
2668
 
2669
          new_limit.cost = best_cost.cost - op_cost;
2670
          new_limit.latency = best_cost.latency - op_latency;
2671
          synth_mult (alg_in, t / d, &new_limit, mode);
2672
 
2673
          alg_in->cost.cost += op_cost;
2674
          alg_in->cost.latency += op_latency;
2675
          if (alg_in->cost.latency < op_cost)
2676
            alg_in->cost.latency = op_cost;
2677
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2678
            {
2679
              struct algorithm *x;
2680
              best_cost = alg_in->cost;
2681
              x = alg_in, alg_in = best_alg, best_alg = x;
2682
              best_alg->log[best_alg->ops] = m;
2683
              best_alg->op[best_alg->ops] = alg_add_factor;
2684
            }
2685
          /* Other factors will have been taken care of in the recursion.  */
2686
          break;
2687
        }
2688
 
2689
      d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2690
      if (t % d == 0 && t > d && m < maxm
2691
          && (!cache_hit || cache_alg == alg_sub_factor))
2692
        {
2693
          /* If the target has a cheap shift-and-subtract insn use
2694
             that in preference to a shift insn followed by a sub insn.
2695
             Assume that the shift-and-sub is "atomic" with a latency
2696
             equal to it's cost, otherwise assume that on superscalar
2697
             hardware the shift may be executed concurrently with the
2698
             earlier steps in the algorithm.  */
2699
          op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2700
          if (shiftsub0_cost[speed][mode][m] < op_cost)
2701
            {
2702
              op_cost = shiftsub0_cost[speed][mode][m];
2703
              op_latency = op_cost;
2704
            }
2705
          else
2706
            op_latency = add_cost[speed][mode];
2707
 
2708
          new_limit.cost = best_cost.cost - op_cost;
2709
          new_limit.latency = best_cost.latency - op_latency;
2710
          synth_mult (alg_in, t / d, &new_limit, mode);
2711
 
2712
          alg_in->cost.cost += op_cost;
2713
          alg_in->cost.latency += op_latency;
2714
          if (alg_in->cost.latency < op_cost)
2715
            alg_in->cost.latency = op_cost;
2716
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2717
            {
2718
              struct algorithm *x;
2719
              best_cost = alg_in->cost;
2720
              x = alg_in, alg_in = best_alg, best_alg = x;
2721
              best_alg->log[best_alg->ops] = m;
2722
              best_alg->op[best_alg->ops] = alg_sub_factor;
2723
            }
2724
          break;
2725
        }
2726
    }
2727
  if (cache_hit)
2728
    goto done;
2729
 
2730
  /* Try shift-and-add (load effective address) instructions,
2731
     i.e. do a*3, a*5, a*9.  */
2732
  if ((t & 1) != 0)
2733
    {
2734
    do_alg_add_t2_m:
2735
      q = t - 1;
2736
      q = q & -q;
2737
      m = exact_log2 (q);
2738
      if (m >= 0 && m < maxm)
2739
        {
2740
          op_cost = shiftadd_cost[speed][mode][m];
2741
          new_limit.cost = best_cost.cost - op_cost;
2742
          new_limit.latency = best_cost.latency - op_cost;
2743
          synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2744
 
2745
          alg_in->cost.cost += op_cost;
2746
          alg_in->cost.latency += op_cost;
2747
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2748
            {
2749
              struct algorithm *x;
2750
              best_cost = alg_in->cost;
2751
              x = alg_in, alg_in = best_alg, best_alg = x;
2752
              best_alg->log[best_alg->ops] = m;
2753
              best_alg->op[best_alg->ops] = alg_add_t2_m;
2754
            }
2755
        }
2756
      if (cache_hit)
2757
        goto done;
2758
 
2759
    do_alg_sub_t2_m:
2760
      q = t + 1;
2761
      q = q & -q;
2762
      m = exact_log2 (q);
2763
      if (m >= 0 && m < maxm)
2764
        {
2765
          op_cost = shiftsub0_cost[speed][mode][m];
2766
          new_limit.cost = best_cost.cost - op_cost;
2767
          new_limit.latency = best_cost.latency - op_cost;
2768
          synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2769
 
2770
          alg_in->cost.cost += op_cost;
2771
          alg_in->cost.latency += op_cost;
2772
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2773
            {
2774
              struct algorithm *x;
2775
              best_cost = alg_in->cost;
2776
              x = alg_in, alg_in = best_alg, best_alg = x;
2777
              best_alg->log[best_alg->ops] = m;
2778
              best_alg->op[best_alg->ops] = alg_sub_t2_m;
2779
            }
2780
        }
2781
      if (cache_hit)
2782
        goto done;
2783
    }
2784
 
2785
 done:
2786
  /* If best_cost has not decreased, we have not found any algorithm.  */
2787
  if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2788
    {
2789
      /* We failed to find an algorithm.  Record alg_impossible for
2790
         this case (that is, <T, MODE, COST_LIMIT>) so that next time
2791
         we are asked to find an algorithm for T within the same or
2792
         lower COST_LIMIT, we can immediately return to the
2793
         caller.  */
2794
      alg_hash[hash_index].t = t;
2795
      alg_hash[hash_index].mode = mode;
2796
      alg_hash[hash_index].speed = speed;
2797
      alg_hash[hash_index].alg = alg_impossible;
2798
      alg_hash[hash_index].cost = *cost_limit;
2799
      return;
2800
    }
2801
 
2802
  /* Cache the result.  */
2803
  if (!cache_hit)
2804
    {
2805
      alg_hash[hash_index].t = t;
2806
      alg_hash[hash_index].mode = mode;
2807
      alg_hash[hash_index].speed = speed;
2808
      alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2809
      alg_hash[hash_index].cost.cost = best_cost.cost;
2810
      alg_hash[hash_index].cost.latency = best_cost.latency;
2811
    }
2812
 
2813
  /* If we are getting a too long sequence for `struct algorithm'
2814
     to record, make this search fail.  */
2815
  if (best_alg->ops == MAX_BITS_PER_WORD)
2816
    return;
2817
 
2818
  /* Copy the algorithm from temporary space to the space at alg_out.
2819
     We avoid using structure assignment because the majority of
2820
     best_alg is normally undefined, and this is a critical function.  */
2821
  alg_out->ops = best_alg->ops + 1;
2822
  alg_out->cost = best_cost;
2823
  memcpy (alg_out->op, best_alg->op,
2824
          alg_out->ops * sizeof *alg_out->op);
2825
  memcpy (alg_out->log, best_alg->log,
2826
          alg_out->ops * sizeof *alg_out->log);
2827
}
2828
 
2829
/* Find the cheapest way of multiplying a value of mode MODE by VAL.
2830
   Try three variations:
2831
 
2832
       - a shift/add sequence based on VAL itself
2833
       - a shift/add sequence based on -VAL, followed by a negation
2834
       - a shift/add sequence based on VAL - 1, followed by an addition.
2835
 
2836
   Return true if the cheapest of these cost less than MULT_COST,
2837
   describing the algorithm in *ALG and final fixup in *VARIANT.  */
2838
 
2839
static bool
2840
choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2841
                     struct algorithm *alg, enum mult_variant *variant,
2842
                     int mult_cost)
2843
{
2844
  struct algorithm alg2;
2845
  struct mult_cost limit;
2846
  int op_cost;
2847
  bool speed = optimize_insn_for_speed_p ();
2848
 
2849
  /* Fail quickly for impossible bounds.  */
2850
  if (mult_cost < 0)
2851
    return false;
2852
 
2853
  /* Ensure that mult_cost provides a reasonable upper bound.
2854
     Any constant multiplication can be performed with less
2855
     than 2 * bits additions.  */
2856
  op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2857
  if (mult_cost > op_cost)
2858
    mult_cost = op_cost;
2859
 
2860
  *variant = basic_variant;
2861
  limit.cost = mult_cost;
2862
  limit.latency = mult_cost;
2863
  synth_mult (alg, val, &limit, mode);
2864
 
2865
  /* This works only if the inverted value actually fits in an
2866
     `unsigned int' */
2867
  if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2868
    {
2869
      op_cost = neg_cost[speed][mode];
2870
      if (MULT_COST_LESS (&alg->cost, mult_cost))
2871
        {
2872
          limit.cost = alg->cost.cost - op_cost;
2873
          limit.latency = alg->cost.latency - op_cost;
2874
        }
2875
      else
2876
        {
2877
          limit.cost = mult_cost - op_cost;
2878
          limit.latency = mult_cost - op_cost;
2879
        }
2880
 
2881
      synth_mult (&alg2, -val, &limit, mode);
2882
      alg2.cost.cost += op_cost;
2883
      alg2.cost.latency += op_cost;
2884
      if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2885
        *alg = alg2, *variant = negate_variant;
2886
    }
2887
 
2888
  /* This proves very useful for division-by-constant.  */
2889
  op_cost = add_cost[speed][mode];
2890
  if (MULT_COST_LESS (&alg->cost, mult_cost))
2891
    {
2892
      limit.cost = alg->cost.cost - op_cost;
2893
      limit.latency = alg->cost.latency - op_cost;
2894
    }
2895
  else
2896
    {
2897
      limit.cost = mult_cost - op_cost;
2898
      limit.latency = mult_cost - op_cost;
2899
    }
2900
 
2901
  synth_mult (&alg2, val - 1, &limit, mode);
2902
  alg2.cost.cost += op_cost;
2903
  alg2.cost.latency += op_cost;
2904
  if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2905
    *alg = alg2, *variant = add_variant;
2906
 
2907
  return MULT_COST_LESS (&alg->cost, mult_cost);
2908
}
2909
 
2910
/* A subroutine of expand_mult, used for constant multiplications.
2911
   Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2912
   convenient.  Use the shift/add sequence described by ALG and apply
2913
   the final fixup specified by VARIANT.  */
2914
 
2915
static rtx
2916
expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2917
                   rtx target, const struct algorithm *alg,
2918
                   enum mult_variant variant)
2919
{
2920
  HOST_WIDE_INT val_so_far;
2921
  rtx insn, accum, tem;
2922
  int opno;
2923
  enum machine_mode nmode;
2924
 
2925
  /* Avoid referencing memory over and over and invalid sharing
2926
     on SUBREGs.  */
2927
  op0 = force_reg (mode, op0);
2928
 
2929
  /* ACCUM starts out either as OP0 or as a zero, depending on
2930
     the first operation.  */
2931
 
2932
  if (alg->op[0] == alg_zero)
2933
    {
2934
      accum = copy_to_mode_reg (mode, const0_rtx);
2935
      val_so_far = 0;
2936
    }
2937
  else if (alg->op[0] == alg_m)
2938
    {
2939
      accum = copy_to_mode_reg (mode, op0);
2940
      val_so_far = 1;
2941
    }
2942
  else
2943
    gcc_unreachable ();
2944
 
2945
  for (opno = 1; opno < alg->ops; opno++)
2946
    {
2947
      int log = alg->log[opno];
2948
      rtx shift_subtarget = optimize ? 0 : accum;
2949
      rtx add_target
2950
        = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2951
           && !optimize)
2952
          ? target : 0;
2953
      rtx accum_target = optimize ? 0 : accum;
2954
      rtx accum_inner;
2955
 
2956
      switch (alg->op[opno])
2957
        {
2958
        case alg_shift:
2959
          tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2960
          /* REG_EQUAL note will be attached to the following insn.  */
2961
          emit_move_insn (accum, tem);
2962
          val_so_far <<= log;
2963
          break;
2964
 
2965
        case alg_add_t_m2:
2966
          tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2967
          accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2968
                                 add_target ? add_target : accum_target);
2969
          val_so_far += (HOST_WIDE_INT) 1 << log;
2970
          break;
2971
 
2972
        case alg_sub_t_m2:
2973
          tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2974
          accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2975
                                 add_target ? add_target : accum_target);
2976
          val_so_far -= (HOST_WIDE_INT) 1 << log;
2977
          break;
2978
 
2979
        case alg_add_t2_m:
2980
          accum = expand_shift (LSHIFT_EXPR, mode, accum,
2981
                                log, shift_subtarget, 0);
2982
          accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2983
                                 add_target ? add_target : accum_target);
2984
          val_so_far = (val_so_far << log) + 1;
2985
          break;
2986
 
2987
        case alg_sub_t2_m:
2988
          accum = expand_shift (LSHIFT_EXPR, mode, accum,
2989
                                log, shift_subtarget, 0);
2990
          accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2991
                                 add_target ? add_target : accum_target);
2992
          val_so_far = (val_so_far << log) - 1;
2993
          break;
2994
 
2995
        case alg_add_factor:
2996
          tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2997
          accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2998
                                 add_target ? add_target : accum_target);
2999
          val_so_far += val_so_far << log;
3000
          break;
3001
 
3002
        case alg_sub_factor:
3003
          tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3004
          accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3005
                                 (add_target
3006
                                  ? add_target : (optimize ? 0 : tem)));
3007
          val_so_far = (val_so_far << log) - val_so_far;
3008
          break;
3009
 
3010
        default:
3011
          gcc_unreachable ();
3012
        }
3013
 
3014
      /* Write a REG_EQUAL note on the last insn so that we can cse
3015
         multiplication sequences.  Note that if ACCUM is a SUBREG,
3016
         we've set the inner register and must properly indicate
3017
         that.  */
3018
 
3019
      tem = op0, nmode = mode;
3020
      accum_inner = accum;
3021
      if (GET_CODE (accum) == SUBREG)
3022
        {
3023
          accum_inner = SUBREG_REG (accum);
3024
          nmode = GET_MODE (accum_inner);
3025
          tem = gen_lowpart (nmode, op0);
3026
        }
3027
 
3028
      insn = get_last_insn ();
3029
      set_dst_reg_note (insn, REG_EQUAL,
3030
                        gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3031
                        accum_inner);
3032
    }
3033
 
3034
  if (variant == negate_variant)
3035
    {
3036
      val_so_far = -val_so_far;
3037
      accum = expand_unop (mode, neg_optab, accum, target, 0);
3038
    }
3039
  else if (variant == add_variant)
3040
    {
3041
      val_so_far = val_so_far + 1;
3042
      accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3043
    }
3044
 
3045
  /* Compare only the bits of val and val_so_far that are significant
3046
     in the result mode, to avoid sign-/zero-extension confusion.  */
3047
  val &= GET_MODE_MASK (mode);
3048
  val_so_far &= GET_MODE_MASK (mode);
3049
  gcc_assert (val == val_so_far);
3050
 
3051
  return accum;
3052
}
3053
 
3054
/* Perform a multiplication and return an rtx for the result.
3055
   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3056
   TARGET is a suggestion for where to store the result (an rtx).
3057
 
3058
   We check specially for a constant integer as OP1.
3059
   If you want this check for OP0 as well, then before calling
3060
   you should swap the two operands if OP0 would be constant.  */
3061
 
3062
rtx
3063
expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3064
             int unsignedp)
3065
{
3066
  enum mult_variant variant;
3067
  struct algorithm algorithm;
3068
  int max_cost;
3069
  bool speed = optimize_insn_for_speed_p ();
3070
 
3071
  /* Handling const0_rtx here allows us to use zero as a rogue value for
3072
     coeff below.  */
3073
  if (op1 == const0_rtx)
3074
    return const0_rtx;
3075
  if (op1 == const1_rtx)
3076
    return op0;
3077
  if (op1 == constm1_rtx)
3078
    return expand_unop (mode,
3079
                        GET_MODE_CLASS (mode) == MODE_INT
3080
                        && !unsignedp && flag_trapv
3081
                        ? negv_optab : neg_optab,
3082
                        op0, target, 0);
3083
 
3084
  /* These are the operations that are potentially turned into a sequence
3085
     of shifts and additions.  */
3086
  if (SCALAR_INT_MODE_P (mode)
3087
      && (unsignedp || !flag_trapv))
3088
    {
3089
      HOST_WIDE_INT coeff = 0;
3090
      rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3091
 
3092
      /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3093
         less than or equal in size to `unsigned int' this doesn't matter.
3094
         If the mode is larger than `unsigned int', then synth_mult works
3095
         only if the constant value exactly fits in an `unsigned int' without
3096
         any truncation.  This means that multiplying by negative values does
3097
         not work; results are off by 2^32 on a 32 bit machine.  */
3098
 
3099
      if (CONST_INT_P (op1))
3100
        {
3101
          /* Attempt to handle multiplication of DImode values by negative
3102
             coefficients, by performing the multiplication by a positive
3103
             multiplier and then inverting the result.  */
3104
          if (INTVAL (op1) < 0
3105
              && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3106
            {
3107
              /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3108
                 result is interpreted as an unsigned coefficient.
3109
                 Exclude cost of op0 from max_cost to match the cost
3110
                 calculation of the synth_mult.  */
3111
              max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3112
                                        speed)
3113
                          - neg_cost[speed][mode]);
3114
              if (max_cost > 0
3115
                  && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3116
                                          &variant, max_cost))
3117
                {
3118
                  rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3119
                                                NULL_RTX, &algorithm,
3120
                                                variant);
3121
                  return expand_unop (mode, neg_optab, temp, target, 0);
3122
                }
3123
            }
3124
          else coeff = INTVAL (op1);
3125
        }
3126
      else if (GET_CODE (op1) == CONST_DOUBLE)
3127
        {
3128
          /* If we are multiplying in DImode, it may still be a win
3129
             to try to work with shifts and adds.  */
3130
          if (CONST_DOUBLE_HIGH (op1) == 0
3131
              && CONST_DOUBLE_LOW (op1) > 0)
3132
            coeff = CONST_DOUBLE_LOW (op1);
3133
          else if (CONST_DOUBLE_LOW (op1) == 0
3134
                   && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3135
            {
3136
              int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3137
                          + HOST_BITS_PER_WIDE_INT;
3138
              return expand_shift (LSHIFT_EXPR, mode, op0,
3139
                                   shift, target, unsignedp);
3140
            }
3141
        }
3142
 
3143
      /* We used to test optimize here, on the grounds that it's better to
3144
         produce a smaller program when -O is not used.  But this causes
3145
         such a terrible slowdown sometimes that it seems better to always
3146
         use synth_mult.  */
3147
      if (coeff != 0)
3148
        {
3149
          /* Special case powers of two.  */
3150
          if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3151
            return expand_shift (LSHIFT_EXPR, mode, op0,
3152
                                 floor_log2 (coeff), target, unsignedp);
3153
 
3154
          /* Exclude cost of op0 from max_cost to match the cost
3155
             calculation of the synth_mult.  */
3156
          max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3157
          if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3158
                                   max_cost))
3159
            return expand_mult_const (mode, op0, coeff, target,
3160
                                      &algorithm, variant);
3161
        }
3162
    }
3163
 
3164
  if (GET_CODE (op0) == CONST_DOUBLE)
3165
    {
3166
      rtx temp = op0;
3167
      op0 = op1;
3168
      op1 = temp;
3169
    }
3170
 
3171
  /* Expand x*2.0 as x+x.  */
3172
  if (GET_CODE (op1) == CONST_DOUBLE
3173
      && SCALAR_FLOAT_MODE_P (mode))
3174
    {
3175
      REAL_VALUE_TYPE d;
3176
      REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3177
 
3178
      if (REAL_VALUES_EQUAL (d, dconst2))
3179
        {
3180
          op0 = force_reg (GET_MODE (op0), op0);
3181
          return expand_binop (mode, add_optab, op0, op0,
3182
                               target, unsignedp, OPTAB_LIB_WIDEN);
3183
        }
3184
    }
3185
 
3186
  /* This used to use umul_optab if unsigned, but for non-widening multiply
3187
     there is no difference between signed and unsigned.  */
3188
  op0 = expand_binop (mode,
3189
                      ! unsignedp
3190
                      && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3191
                      ? smulv_optab : smul_optab,
3192
                      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3193
  gcc_assert (op0);
3194
  return op0;
3195
}
3196
 
3197
/* Perform a widening multiplication and return an rtx for the result.
3198
   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3199
   TARGET is a suggestion for where to store the result (an rtx).
3200
   THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3201
   or smul_widen_optab.
3202
 
3203
   We check specially for a constant integer as OP1, comparing the
3204
   cost of a widening multiply against the cost of a sequence of shifts
3205
   and adds.  */
3206
 
3207
rtx
3208
expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3209
                      int unsignedp, optab this_optab)
3210
{
3211
  bool speed = optimize_insn_for_speed_p ();
3212
  rtx cop1;
3213
 
3214
  if (CONST_INT_P (op1)
3215
      && GET_MODE (op0) != VOIDmode
3216
      && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3217
                                this_optab == umul_widen_optab))
3218
      && CONST_INT_P (cop1)
3219
      && (INTVAL (cop1) >= 0
3220
          || HWI_COMPUTABLE_MODE_P (mode)))
3221
    {
3222
      HOST_WIDE_INT coeff = INTVAL (cop1);
3223
      int max_cost;
3224
      enum mult_variant variant;
3225
      struct algorithm algorithm;
3226
 
3227
      /* Special case powers of two.  */
3228
      if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3229
        {
3230
          op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3231
          return expand_shift (LSHIFT_EXPR, mode, op0,
3232
                               floor_log2 (coeff), target, unsignedp);
3233
        }
3234
 
3235
      /* Exclude cost of op0 from max_cost to match the cost
3236
         calculation of the synth_mult.  */
3237
      max_cost = mul_widen_cost[speed][mode];
3238
      if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3239
                               max_cost))
3240
        {
3241
          op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3242
          return expand_mult_const (mode, op0, coeff, target,
3243
                                    &algorithm, variant);
3244
        }
3245
    }
3246
  return expand_binop (mode, this_optab, op0, op1, target,
3247
                       unsignedp, OPTAB_LIB_WIDEN);
3248
}
3249
 
3250
/* Return the smallest n such that 2**n >= X.  */
3251
 
3252
int
3253
ceil_log2 (unsigned HOST_WIDE_INT x)
3254
{
3255
  return floor_log2 (x - 1) + 1;
3256
}
3257
 
3258
/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3259
   replace division by D, and put the least significant N bits of the result
3260
   in *MULTIPLIER_PTR and return the most significant bit.
3261
 
3262
   The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3263
   needed precision is in PRECISION (should be <= N).
3264
 
3265
   PRECISION should be as small as possible so this function can choose
3266
   multiplier more freely.
3267
 
3268
   The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3269
   is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3270
 
3271
   Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3272
   where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3273
 
3274
static
3275
unsigned HOST_WIDE_INT
3276
choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3277
                   rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3278
{
3279
  HOST_WIDE_INT mhigh_hi, mlow_hi;
3280
  unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3281
  int lgup, post_shift;
3282
  int pow, pow2;
3283
  unsigned HOST_WIDE_INT nl, dummy1;
3284
  HOST_WIDE_INT nh, dummy2;
3285
 
3286
  /* lgup = ceil(log2(divisor)); */
3287
  lgup = ceil_log2 (d);
3288
 
3289
  gcc_assert (lgup <= n);
3290
 
3291
  pow = n + lgup;
3292
  pow2 = n + lgup - precision;
3293
 
3294
  /* We could handle this with some effort, but this case is much
3295
     better handled directly with a scc insn, so rely on caller using
3296
     that.  */
3297
  gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3298
 
3299
  /* mlow = 2^(N + lgup)/d */
3300
 if (pow >= HOST_BITS_PER_WIDE_INT)
3301
    {
3302
      nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3303
      nl = 0;
3304
    }
3305
  else
3306
    {
3307
      nh = 0;
3308
      nl = (unsigned HOST_WIDE_INT) 1 << pow;
3309
    }
3310
  div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3311
                        &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3312
 
3313
  /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3314
  if (pow2 >= HOST_BITS_PER_WIDE_INT)
3315
    nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3316
  else
3317
    nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3318
  div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3319
                        &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3320
 
3321
  gcc_assert (!mhigh_hi || nh - d < d);
3322
  gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3323
  /* Assert that mlow < mhigh.  */
3324
  gcc_assert (mlow_hi < mhigh_hi
3325
              || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3326
 
3327
  /* If precision == N, then mlow, mhigh exceed 2^N
3328
     (but they do not exceed 2^(N+1)).  */
3329
 
3330
  /* Reduce to lowest terms.  */
3331
  for (post_shift = lgup; post_shift > 0; post_shift--)
3332
    {
3333
      unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3334
      unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3335
      if (ml_lo >= mh_lo)
3336
        break;
3337
 
3338
      mlow_hi = 0;
3339
      mlow_lo = ml_lo;
3340
      mhigh_hi = 0;
3341
      mhigh_lo = mh_lo;
3342
    }
3343
 
3344
  *post_shift_ptr = post_shift;
3345
  *lgup_ptr = lgup;
3346
  if (n < HOST_BITS_PER_WIDE_INT)
3347
    {
3348
      unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3349
      *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3350
      return mhigh_lo >= mask;
3351
    }
3352
  else
3353
    {
3354
      *multiplier_ptr = GEN_INT (mhigh_lo);
3355
      return mhigh_hi;
3356
    }
3357
}
3358
 
3359
/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3360
   congruent to 1 (mod 2**N).  */
3361
 
3362
static unsigned HOST_WIDE_INT
3363
invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3364
{
3365
  /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3366
 
3367
  /* The algorithm notes that the choice y = x satisfies
3368
     x*y == 1 mod 2^3, since x is assumed odd.
3369
     Each iteration doubles the number of bits of significance in y.  */
3370
 
3371
  unsigned HOST_WIDE_INT mask;
3372
  unsigned HOST_WIDE_INT y = x;
3373
  int nbit = 3;
3374
 
3375
  mask = (n == HOST_BITS_PER_WIDE_INT
3376
          ? ~(unsigned HOST_WIDE_INT) 0
3377
          : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3378
 
3379
  while (nbit < n)
3380
    {
3381
      y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3382
      nbit *= 2;
3383
    }
3384
  return y;
3385
}
3386
 
3387
/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3388
   flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3389
   product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3390
   to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3391
   become signed.
3392
 
3393
   The result is put in TARGET if that is convenient.
3394
 
3395
   MODE is the mode of operation.  */
3396
 
3397
rtx
3398
expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3399
                             rtx op1, rtx target, int unsignedp)
3400
{
3401
  rtx tem;
3402
  enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3403
 
3404
  tem = expand_shift (RSHIFT_EXPR, mode, op0,
3405
                      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3406
  tem = expand_and (mode, tem, op1, NULL_RTX);
3407
  adj_operand
3408
    = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3409
                     adj_operand);
3410
 
3411
  tem = expand_shift (RSHIFT_EXPR, mode, op1,
3412
                      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3413
  tem = expand_and (mode, tem, op0, NULL_RTX);
3414
  target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3415
                          target);
3416
 
3417
  return target;
3418
}
3419
 
3420
/* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3421
 
3422
static rtx
3423
extract_high_half (enum machine_mode mode, rtx op)
3424
{
3425
  enum machine_mode wider_mode;
3426
 
3427
  if (mode == word_mode)
3428
    return gen_highpart (mode, op);
3429
 
3430
  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3431
 
3432
  wider_mode = GET_MODE_WIDER_MODE (mode);
3433
  op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3434
                     GET_MODE_BITSIZE (mode), 0, 1);
3435
  return convert_modes (mode, wider_mode, op, 0);
3436
}
3437
 
3438
/* Like expand_mult_highpart, but only consider using a multiplication
3439
   optab.  OP1 is an rtx for the constant operand.  */
3440
 
3441
static rtx
3442
expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3443
                            rtx target, int unsignedp, int max_cost)
3444
{
3445
  rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3446
  enum machine_mode wider_mode;
3447
  optab moptab;
3448
  rtx tem;
3449
  int size;
3450
  bool speed = optimize_insn_for_speed_p ();
3451
 
3452
  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3453
 
3454
  wider_mode = GET_MODE_WIDER_MODE (mode);
3455
  size = GET_MODE_BITSIZE (mode);
3456
 
3457
  /* Firstly, try using a multiplication insn that only generates the needed
3458
     high part of the product, and in the sign flavor of unsignedp.  */
3459
  if (mul_highpart_cost[speed][mode] < max_cost)
3460
    {
3461
      moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3462
      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3463
                          unsignedp, OPTAB_DIRECT);
3464
      if (tem)
3465
        return tem;
3466
    }
3467
 
3468
  /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3469
     Need to adjust the result after the multiplication.  */
3470
  if (size - 1 < BITS_PER_WORD
3471
      && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3472
          + 4 * add_cost[speed][mode] < max_cost))
3473
    {
3474
      moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3475
      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3476
                          unsignedp, OPTAB_DIRECT);
3477
      if (tem)
3478
        /* We used the wrong signedness.  Adjust the result.  */
3479
        return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3480
                                            tem, unsignedp);
3481
    }
3482
 
3483
  /* Try widening multiplication.  */
3484
  moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3485
  if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3486
      && mul_widen_cost[speed][wider_mode] < max_cost)
3487
    {
3488
      tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3489
                          unsignedp, OPTAB_WIDEN);
3490
      if (tem)
3491
        return extract_high_half (mode, tem);
3492
    }
3493
 
3494
  /* Try widening the mode and perform a non-widening multiplication.  */
3495
  if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3496
      && size - 1 < BITS_PER_WORD
3497
      && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3498
    {
3499
      rtx insns, wop0, wop1;
3500
 
3501
      /* We need to widen the operands, for example to ensure the
3502
         constant multiplier is correctly sign or zero extended.
3503
         Use a sequence to clean-up any instructions emitted by
3504
         the conversions if things don't work out.  */
3505
      start_sequence ();
3506
      wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3507
      wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3508
      tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3509
                          unsignedp, OPTAB_WIDEN);
3510
      insns = get_insns ();
3511
      end_sequence ();
3512
 
3513
      if (tem)
3514
        {
3515
          emit_insn (insns);
3516
          return extract_high_half (mode, tem);
3517
        }
3518
    }
3519
 
3520
  /* Try widening multiplication of opposite signedness, and adjust.  */
3521
  moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3522
  if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3523
      && size - 1 < BITS_PER_WORD
3524
      && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3525
          + 4 * add_cost[speed][mode] < max_cost))
3526
    {
3527
      tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3528
                          NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3529
      if (tem != 0)
3530
        {
3531
          tem = extract_high_half (mode, tem);
3532
          /* We used the wrong signedness.  Adjust the result.  */
3533
          return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3534
                                              target, unsignedp);
3535
        }
3536
    }
3537
 
3538
  return 0;
3539
}
3540
 
3541
/* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3542
   putting the high half of the result in TARGET if that is convenient,
3543
   and return where the result is.  If the operation can not be performed,
3544
 
3545
 
3546
   MODE is the mode of operation and result.
3547
 
3548
   UNSIGNEDP nonzero means unsigned multiply.
3549
 
3550
   MAX_COST is the total allowed cost for the expanded RTL.  */
3551
 
3552
static rtx
3553
expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3554
                      rtx target, int unsignedp, int max_cost)
3555
{
3556
  enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3557
  unsigned HOST_WIDE_INT cnst1;
3558
  int extra_cost;
3559
  bool sign_adjust = false;
3560
  enum mult_variant variant;
3561
  struct algorithm alg;
3562
  rtx tem;
3563
  bool speed = optimize_insn_for_speed_p ();
3564
 
3565
  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3566
  /* We can't support modes wider than HOST_BITS_PER_INT.  */
3567
  gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3568
 
3569
  cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3570
 
3571
  /* We can't optimize modes wider than BITS_PER_WORD.
3572
     ??? We might be able to perform double-word arithmetic if
3573
     mode == word_mode, however all the cost calculations in
3574
     synth_mult etc. assume single-word operations.  */
3575
  if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3576
    return expand_mult_highpart_optab (mode, op0, op1, target,
3577
                                       unsignedp, max_cost);
3578
 
3579
  extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3580
 
3581
  /* Check whether we try to multiply by a negative constant.  */
3582
  if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3583
    {
3584
      sign_adjust = true;
3585
      extra_cost += add_cost[speed][mode];
3586
    }
3587
 
3588
  /* See whether shift/add multiplication is cheap enough.  */
3589
  if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3590
                           max_cost - extra_cost))
3591
    {
3592
      /* See whether the specialized multiplication optabs are
3593
         cheaper than the shift/add version.  */
3594
      tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3595
                                        alg.cost.cost + extra_cost);
3596
      if (tem)
3597
        return tem;
3598
 
3599
      tem = convert_to_mode (wider_mode, op0, unsignedp);
3600
      tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3601
      tem = extract_high_half (mode, tem);
3602
 
3603
      /* Adjust result for signedness.  */
3604
      if (sign_adjust)
3605
        tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3606
 
3607
      return tem;
3608
    }
3609
  return expand_mult_highpart_optab (mode, op0, op1, target,
3610
                                     unsignedp, max_cost);
3611
}
3612
 
3613
 
3614
/* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3615
 
3616
static rtx
3617
expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3618
{
3619
  unsigned HOST_WIDE_INT masklow, maskhigh;
3620
  rtx result, temp, shift, label;
3621
  int logd;
3622
 
3623
  logd = floor_log2 (d);
3624
  result = gen_reg_rtx (mode);
3625
 
3626
  /* Avoid conditional branches when they're expensive.  */
3627
  if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3628
      && optimize_insn_for_speed_p ())
3629
    {
3630
      rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3631
                                      mode, 0, -1);
3632
      if (signmask)
3633
        {
3634
          signmask = force_reg (mode, signmask);
3635
          masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3636
          shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3637
 
3638
          /* Use the rtx_cost of a LSHIFTRT instruction to determine
3639
             which instruction sequence to use.  If logical right shifts
3640
             are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3641
             use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3642
 
3643
          temp = gen_rtx_LSHIFTRT (mode, result, shift);
3644
          if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3645
              || (set_src_cost (temp, optimize_insn_for_speed_p ())
3646
                  > COSTS_N_INSNS (2)))
3647
            {
3648
              temp = expand_binop (mode, xor_optab, op0, signmask,
3649
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3650
              temp = expand_binop (mode, sub_optab, temp, signmask,
3651
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3652
              temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3653
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3654
              temp = expand_binop (mode, xor_optab, temp, signmask,
3655
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3656
              temp = expand_binop (mode, sub_optab, temp, signmask,
3657
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3658
            }
3659
          else
3660
            {
3661
              signmask = expand_binop (mode, lshr_optab, signmask, shift,
3662
                                       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3663
              signmask = force_reg (mode, signmask);
3664
 
3665
              temp = expand_binop (mode, add_optab, op0, signmask,
3666
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3667
              temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3668
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3669
              temp = expand_binop (mode, sub_optab, temp, signmask,
3670
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3671
            }
3672
          return temp;
3673
        }
3674
    }
3675
 
3676
  /* Mask contains the mode's signbit and the significant bits of the
3677
     modulus.  By including the signbit in the operation, many targets
3678
     can avoid an explicit compare operation in the following comparison
3679
     against zero.  */
3680
 
3681
  masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3682
  if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3683
    {
3684
      masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3685
      maskhigh = -1;
3686
    }
3687
  else
3688
    maskhigh = (HOST_WIDE_INT) -1
3689
                 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3690
 
3691
  temp = expand_binop (mode, and_optab, op0,
3692
                       immed_double_const (masklow, maskhigh, mode),
3693
                       result, 1, OPTAB_LIB_WIDEN);
3694
  if (temp != result)
3695
    emit_move_insn (result, temp);
3696
 
3697
  label = gen_label_rtx ();
3698
  do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3699
 
3700
  temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3701
                       0, OPTAB_LIB_WIDEN);
3702
  masklow = (HOST_WIDE_INT) -1 << logd;
3703
  maskhigh = -1;
3704
  temp = expand_binop (mode, ior_optab, temp,
3705
                       immed_double_const (masklow, maskhigh, mode),
3706
                       result, 1, OPTAB_LIB_WIDEN);
3707
  temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3708
                       0, OPTAB_LIB_WIDEN);
3709
  if (temp != result)
3710
    emit_move_insn (result, temp);
3711
  emit_label (label);
3712
  return result;
3713
}
3714
 
3715
/* Expand signed division of OP0 by a power of two D in mode MODE.
3716
   This routine is only called for positive values of D.  */
3717
 
3718
static rtx
3719
expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3720
{
3721
  rtx temp, label;
3722
  int logd;
3723
 
3724
  logd = floor_log2 (d);
3725
 
3726
  if (d == 2
3727
      && BRANCH_COST (optimize_insn_for_speed_p (),
3728
                      false) >= 1)
3729
    {
3730
      temp = gen_reg_rtx (mode);
3731
      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3732
      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3733
                           0, OPTAB_LIB_WIDEN);
3734
      return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3735
    }
3736
 
3737
#ifdef HAVE_conditional_move
3738
  if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3739
      >= 2)
3740
    {
3741
      rtx temp2;
3742
 
3743
      /* ??? emit_conditional_move forces a stack adjustment via
3744
         compare_from_rtx so, if the sequence is discarded, it will
3745
         be lost.  Do it now instead.  */
3746
      do_pending_stack_adjust ();
3747
 
3748
      start_sequence ();
3749
      temp2 = copy_to_mode_reg (mode, op0);
3750
      temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3751
                           NULL_RTX, 0, OPTAB_LIB_WIDEN);
3752
      temp = force_reg (mode, temp);
3753
 
3754
      /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3755
      temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3756
                                     mode, temp, temp2, mode, 0);
3757
      if (temp2)
3758
        {
3759
          rtx seq = get_insns ();
3760
          end_sequence ();
3761
          emit_insn (seq);
3762
          return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3763
        }
3764
      end_sequence ();
3765
    }
3766
#endif
3767
 
3768
  if (BRANCH_COST (optimize_insn_for_speed_p (),
3769
                   false) >= 2)
3770
    {
3771
      int ushift = GET_MODE_BITSIZE (mode) - logd;
3772
 
3773
      temp = gen_reg_rtx (mode);
3774
      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3775
      if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3776
        temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3777
                             NULL_RTX, 0, OPTAB_LIB_WIDEN);
3778
      else
3779
        temp = expand_shift (RSHIFT_EXPR, mode, temp,
3780
                             ushift, NULL_RTX, 1);
3781
      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3782
                           0, OPTAB_LIB_WIDEN);
3783
      return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3784
    }
3785
 
3786
  label = gen_label_rtx ();
3787
  temp = copy_to_mode_reg (mode, op0);
3788
  do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3789
  expand_inc (temp, GEN_INT (d - 1));
3790
  emit_label (label);
3791
  return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3792
}
3793
 
3794
/* Emit the code to divide OP0 by OP1, putting the result in TARGET
3795
   if that is convenient, and returning where the result is.
3796
   You may request either the quotient or the remainder as the result;
3797
   specify REM_FLAG nonzero to get the remainder.
3798
 
3799
   CODE is the expression code for which kind of division this is;
3800
   it controls how rounding is done.  MODE is the machine mode to use.
3801
   UNSIGNEDP nonzero means do unsigned division.  */
3802
 
3803
/* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3804
   and then correct it by or'ing in missing high bits
3805
   if result of ANDI is nonzero.
3806
   For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3807
   This could optimize to a bfexts instruction.
3808
   But C doesn't use these operations, so their optimizations are
3809
   left for later.  */
3810
/* ??? For modulo, we don't actually need the highpart of the first product,
3811
   the low part will do nicely.  And for small divisors, the second multiply
3812
   can also be a low-part only multiply or even be completely left out.
3813
   E.g. to calculate the remainder of a division by 3 with a 32 bit
3814
   multiply, multiply with 0x55555556 and extract the upper two bits;
3815
   the result is exact for inputs up to 0x1fffffff.
3816
   The input range can be reduced by using cross-sum rules.
3817
   For odd divisors >= 3, the following table gives right shift counts
3818
   so that if a number is shifted by an integer multiple of the given
3819
   amount, the remainder stays the same:
3820
   2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3821
   14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3822
   0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3823
   20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3824
   0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3825
 
3826
   Cross-sum rules for even numbers can be derived by leaving as many bits
3827
   to the right alone as the divisor has zeros to the right.
3828
   E.g. if x is an unsigned 32 bit number:
3829
   (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3830
   */
3831
 
3832
rtx
3833
expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3834
               rtx op0, rtx op1, rtx target, int unsignedp)
3835
{
3836
  enum machine_mode compute_mode;
3837
  rtx tquotient;
3838
  rtx quotient = 0, remainder = 0;
3839
  rtx last;
3840
  int size;
3841
  rtx insn;
3842
  optab optab1, optab2;
3843
  int op1_is_constant, op1_is_pow2 = 0;
3844
  int max_cost, extra_cost;
3845
  static HOST_WIDE_INT last_div_const = 0;
3846
  static HOST_WIDE_INT ext_op1;
3847
  bool speed = optimize_insn_for_speed_p ();
3848
 
3849
  op1_is_constant = CONST_INT_P (op1);
3850
  if (op1_is_constant)
3851
    {
3852
      ext_op1 = INTVAL (op1);
3853
      if (unsignedp)
3854
        ext_op1 &= GET_MODE_MASK (mode);
3855
      op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3856
                     || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3857
    }
3858
 
3859
  /*
3860
     This is the structure of expand_divmod:
3861
 
3862
     First comes code to fix up the operands so we can perform the operations
3863
     correctly and efficiently.
3864
 
3865
     Second comes a switch statement with code specific for each rounding mode.
3866
     For some special operands this code emits all RTL for the desired
3867
     operation, for other cases, it generates only a quotient and stores it in
3868
     QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3869
     to indicate that it has not done anything.
3870
 
3871
     Last comes code that finishes the operation.  If QUOTIENT is set and
3872
     REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3873
     QUOTIENT is not set, it is computed using trunc rounding.
3874
 
3875
     We try to generate special code for division and remainder when OP1 is a
3876
     constant.  If |OP1| = 2**n we can use shifts and some other fast
3877
     operations.  For other values of OP1, we compute a carefully selected
3878
     fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3879
     by m.
3880
 
3881
     In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3882
     half of the product.  Different strategies for generating the product are
3883
     implemented in expand_mult_highpart.
3884
 
3885
     If what we actually want is the remainder, we generate that by another
3886
     by-constant multiplication and a subtraction.  */
3887
 
3888
  /* We shouldn't be called with OP1 == const1_rtx, but some of the
3889
     code below will malfunction if we are, so check here and handle
3890
     the special case if so.  */
3891
  if (op1 == const1_rtx)
3892
    return rem_flag ? const0_rtx : op0;
3893
 
3894
    /* When dividing by -1, we could get an overflow.
3895
     negv_optab can handle overflows.  */
3896
  if (! unsignedp && op1 == constm1_rtx)
3897
    {
3898
      if (rem_flag)
3899
        return const0_rtx;
3900
      return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3901
                          ? negv_optab : neg_optab, op0, target, 0);
3902
    }
3903
 
3904
  if (target
3905
      /* Don't use the function value register as a target
3906
         since we have to read it as well as write it,
3907
         and function-inlining gets confused by this.  */
3908
      && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3909
          /* Don't clobber an operand while doing a multi-step calculation.  */
3910
          || ((rem_flag || op1_is_constant)
3911
              && (reg_mentioned_p (target, op0)
3912
                  || (MEM_P (op0) && MEM_P (target))))
3913
          || reg_mentioned_p (target, op1)
3914
          || (MEM_P (op1) && MEM_P (target))))
3915
    target = 0;
3916
 
3917
  /* Get the mode in which to perform this computation.  Normally it will
3918
     be MODE, but sometimes we can't do the desired operation in MODE.
3919
     If so, pick a wider mode in which we can do the operation.  Convert
3920
     to that mode at the start to avoid repeated conversions.
3921
 
3922
     First see what operations we need.  These depend on the expression
3923
     we are evaluating.  (We assume that divxx3 insns exist under the
3924
     same conditions that modxx3 insns and that these insns don't normally
3925
     fail.  If these assumptions are not correct, we may generate less
3926
     efficient code in some cases.)
3927
 
3928
     Then see if we find a mode in which we can open-code that operation
3929
     (either a division, modulus, or shift).  Finally, check for the smallest
3930
     mode for which we can do the operation with a library call.  */
3931
 
3932
  /* We might want to refine this now that we have division-by-constant
3933
     optimization.  Since expand_mult_highpart tries so many variants, it is
3934
     not straightforward to generalize this.  Maybe we should make an array
3935
     of possible modes in init_expmed?  Save this for GCC 2.7.  */
3936
 
3937
  optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3938
            ? (unsignedp ? lshr_optab : ashr_optab)
3939
            : (unsignedp ? udiv_optab : sdiv_optab));
3940
  optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3941
            ? optab1
3942
            : (unsignedp ? udivmod_optab : sdivmod_optab));
3943
 
3944
  for (compute_mode = mode; compute_mode != VOIDmode;
3945
       compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3946
    if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3947
        || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3948
      break;
3949
 
3950
  if (compute_mode == VOIDmode)
3951
    for (compute_mode = mode; compute_mode != VOIDmode;
3952
         compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3953
      if (optab_libfunc (optab1, compute_mode)
3954
          || optab_libfunc (optab2, compute_mode))
3955
        break;
3956
 
3957
  /* If we still couldn't find a mode, use MODE, but expand_binop will
3958
     probably die.  */
3959
  if (compute_mode == VOIDmode)
3960
    compute_mode = mode;
3961
 
3962
  if (target && GET_MODE (target) == compute_mode)
3963
    tquotient = target;
3964
  else
3965
    tquotient = gen_reg_rtx (compute_mode);
3966
 
3967
  size = GET_MODE_BITSIZE (compute_mode);
3968
#if 0
3969
  /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3970
     (mode), and thereby get better code when OP1 is a constant.  Do that
3971
     later.  It will require going over all usages of SIZE below.  */
3972
  size = GET_MODE_BITSIZE (mode);
3973
#endif
3974
 
3975
  /* Only deduct something for a REM if the last divide done was
3976
     for a different constant.   Then set the constant of the last
3977
     divide.  */
3978
  max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3979
  if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3980
                     && INTVAL (op1) == last_div_const))
3981
    max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3982
 
3983
  last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3984
 
3985
  /* Now convert to the best mode to use.  */
3986
  if (compute_mode != mode)
3987
    {
3988
      op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3989
      op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3990
 
3991
      /* convert_modes may have placed op1 into a register, so we
3992
         must recompute the following.  */
3993
      op1_is_constant = CONST_INT_P (op1);
3994
      op1_is_pow2 = (op1_is_constant
3995
                     && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3996
                          || (! unsignedp
3997
                              && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3998
    }
3999
 
4000
  /* If one of the operands is a volatile MEM, copy it into a register.  */
4001
 
4002
  if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4003
    op0 = force_reg (compute_mode, op0);
4004
  if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4005
    op1 = force_reg (compute_mode, op1);
4006
 
4007
  /* If we need the remainder or if OP1 is constant, we need to
4008
     put OP0 in a register in case it has any queued subexpressions.  */
4009
  if (rem_flag || op1_is_constant)
4010
    op0 = force_reg (compute_mode, op0);
4011
 
4012
  last = get_last_insn ();
4013
 
4014
  /* Promote floor rounding to trunc rounding for unsigned operations.  */
4015
  if (unsignedp)
4016
    {
4017
      if (code == FLOOR_DIV_EXPR)
4018
        code = TRUNC_DIV_EXPR;
4019
      if (code == FLOOR_MOD_EXPR)
4020
        code = TRUNC_MOD_EXPR;
4021
      if (code == EXACT_DIV_EXPR && op1_is_pow2)
4022
        code = TRUNC_DIV_EXPR;
4023
    }
4024
 
4025
  if (op1 != const0_rtx)
4026
    switch (code)
4027
      {
4028
      case TRUNC_MOD_EXPR:
4029
      case TRUNC_DIV_EXPR:
4030
        if (op1_is_constant)
4031
          {
4032
            if (unsignedp)
4033
              {
4034
                unsigned HOST_WIDE_INT mh;
4035
                int pre_shift, post_shift;
4036
                int dummy;
4037
                rtx ml;
4038
                unsigned HOST_WIDE_INT d = (INTVAL (op1)
4039
                                            & GET_MODE_MASK (compute_mode));
4040
 
4041
                if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4042
                  {
4043
                    pre_shift = floor_log2 (d);
4044
                    if (rem_flag)
4045
                      {
4046
                        remainder
4047
                          = expand_binop (compute_mode, and_optab, op0,
4048
                                          GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4049
                                          remainder, 1,
4050
                                          OPTAB_LIB_WIDEN);
4051
                        if (remainder)
4052
                          return gen_lowpart (mode, remainder);
4053
                      }
4054
                    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4055
                                             pre_shift, tquotient, 1);
4056
                  }
4057
                else if (size <= HOST_BITS_PER_WIDE_INT)
4058
                  {
4059
                    if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4060
                      {
4061
                        /* Most significant bit of divisor is set; emit an scc
4062
                           insn.  */
4063
                        quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4064
                                                          compute_mode, 1, 1);
4065
                      }
4066
                    else
4067
                      {
4068
                        /* Find a suitable multiplier and right shift count
4069
                           instead of multiplying with D.  */
4070
 
4071
                        mh = choose_multiplier (d, size, size,
4072
                                                &ml, &post_shift, &dummy);
4073
 
4074
                        /* If the suggested multiplier is more than SIZE bits,
4075
                           we can do better for even divisors, using an
4076
                           initial right shift.  */
4077
                        if (mh != 0 && (d & 1) == 0)
4078
                          {
4079
                            pre_shift = floor_log2 (d & -d);
4080
                            mh = choose_multiplier (d >> pre_shift, size,
4081
                                                    size - pre_shift,
4082
                                                    &ml, &post_shift, &dummy);
4083
                            gcc_assert (!mh);
4084
                          }
4085
                        else
4086
                          pre_shift = 0;
4087
 
4088
                        if (mh != 0)
4089
                          {
4090
                            rtx t1, t2, t3, t4;
4091
 
4092
                            if (post_shift - 1 >= BITS_PER_WORD)
4093
                              goto fail1;
4094
 
4095
                            extra_cost
4096
                              = (shift_cost[speed][compute_mode][post_shift - 1]
4097
                                 + shift_cost[speed][compute_mode][1]
4098
                                 + 2 * add_cost[speed][compute_mode]);
4099
                            t1 = expand_mult_highpart (compute_mode, op0, ml,
4100
                                                       NULL_RTX, 1,
4101
                                                       max_cost - extra_cost);
4102
                            if (t1 == 0)
4103
                              goto fail1;
4104
                            t2 = force_operand (gen_rtx_MINUS (compute_mode,
4105
                                                               op0, t1),
4106
                                                NULL_RTX);
4107
                            t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4108
                                               t2, 1, NULL_RTX, 1);
4109
                            t4 = force_operand (gen_rtx_PLUS (compute_mode,
4110
                                                              t1, t3),
4111
                                                NULL_RTX);
4112
                            quotient = expand_shift
4113
                              (RSHIFT_EXPR, compute_mode, t4,
4114
                               post_shift - 1, tquotient, 1);
4115
                          }
4116
                        else
4117
                          {
4118
                            rtx t1, t2;
4119
 
4120
                            if (pre_shift >= BITS_PER_WORD
4121
                                || post_shift >= BITS_PER_WORD)
4122
                              goto fail1;
4123
 
4124
                            t1 = expand_shift
4125
                              (RSHIFT_EXPR, compute_mode, op0,
4126
                               pre_shift, NULL_RTX, 1);
4127
                            extra_cost
4128
                              = (shift_cost[speed][compute_mode][pre_shift]
4129
                                 + shift_cost[speed][compute_mode][post_shift]);
4130
                            t2 = expand_mult_highpart (compute_mode, t1, ml,
4131
                                                       NULL_RTX, 1,
4132
                                                       max_cost - extra_cost);
4133
                            if (t2 == 0)
4134
                              goto fail1;
4135
                            quotient = expand_shift
4136
                              (RSHIFT_EXPR, compute_mode, t2,
4137
                               post_shift, tquotient, 1);
4138
                          }
4139
                      }
4140
                  }
4141
                else            /* Too wide mode to use tricky code */
4142
                  break;
4143
 
4144
                insn = get_last_insn ();
4145
                if (insn != last)
4146
                  set_dst_reg_note (insn, REG_EQUAL,
4147
                                    gen_rtx_UDIV (compute_mode, op0, op1),
4148
                                    quotient);
4149
              }
4150
            else                /* TRUNC_DIV, signed */
4151
              {
4152
                unsigned HOST_WIDE_INT ml;
4153
                int lgup, post_shift;
4154
                rtx mlr;
4155
                HOST_WIDE_INT d = INTVAL (op1);
4156
                unsigned HOST_WIDE_INT abs_d;
4157
 
4158
                /* Since d might be INT_MIN, we have to cast to
4159
                   unsigned HOST_WIDE_INT before negating to avoid
4160
                   undefined signed overflow.  */
4161
                abs_d = (d >= 0
4162
                         ? (unsigned HOST_WIDE_INT) d
4163
                         : - (unsigned HOST_WIDE_INT) d);
4164
 
4165
                /* n rem d = n rem -d */
4166
                if (rem_flag && d < 0)
4167
                  {
4168
                    d = abs_d;
4169
                    op1 = gen_int_mode (abs_d, compute_mode);
4170
                  }
4171
 
4172
                if (d == 1)
4173
                  quotient = op0;
4174
                else if (d == -1)
4175
                  quotient = expand_unop (compute_mode, neg_optab, op0,
4176
                                          tquotient, 0);
4177
                else if (HOST_BITS_PER_WIDE_INT >= size
4178
                         && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4179
                  {
4180
                    /* This case is not handled correctly below.  */
4181
                    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4182
                                                compute_mode, 1, 1);
4183
                    if (quotient == 0)
4184
                      goto fail1;
4185
                  }
4186
                else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4187
                         && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4188
                                      : sdiv_pow2_cheap[speed][compute_mode])
4189
                         /* We assume that cheap metric is true if the
4190
                            optab has an expander for this mode.  */
4191
                         && ((optab_handler ((rem_flag ? smod_optab
4192
                                              : sdiv_optab),
4193
                                             compute_mode)
4194
                              != CODE_FOR_nothing)
4195
                             || (optab_handler (sdivmod_optab,
4196
                                                compute_mode)
4197
                                 != CODE_FOR_nothing)))
4198
                  ;
4199
                else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4200
                  {
4201
                    if (rem_flag)
4202
                      {
4203
                        remainder = expand_smod_pow2 (compute_mode, op0, d);
4204
                        if (remainder)
4205
                          return gen_lowpart (mode, remainder);
4206
                      }
4207
 
4208
                    if (sdiv_pow2_cheap[speed][compute_mode]
4209
                        && ((optab_handler (sdiv_optab, compute_mode)
4210
                             != CODE_FOR_nothing)
4211
                            || (optab_handler (sdivmod_optab, compute_mode)
4212
                                != CODE_FOR_nothing)))
4213
                      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4214
                                                compute_mode, op0,
4215
                                                gen_int_mode (abs_d,
4216
                                                              compute_mode),
4217
                                                NULL_RTX, 0);
4218
                    else
4219
                      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4220
 
4221
                    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4222
                       negate the quotient.  */
4223
                    if (d < 0)
4224
                      {
4225
                        insn = get_last_insn ();
4226
                        if (insn != last
4227
                            && abs_d < ((unsigned HOST_WIDE_INT) 1
4228
                                        << (HOST_BITS_PER_WIDE_INT - 1)))
4229
                          set_dst_reg_note (insn, REG_EQUAL,
4230
                                            gen_rtx_DIV (compute_mode, op0,
4231
                                                         gen_int_mode
4232
                                                           (abs_d,
4233
                                                            compute_mode)),
4234
                                            quotient);
4235
 
4236
                        quotient = expand_unop (compute_mode, neg_optab,
4237
                                                quotient, quotient, 0);
4238
                      }
4239
                  }
4240
                else if (size <= HOST_BITS_PER_WIDE_INT)
4241
                  {
4242
                    choose_multiplier (abs_d, size, size - 1,
4243
                                       &mlr, &post_shift, &lgup);
4244
                    ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4245
                    if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4246
                      {
4247
                        rtx t1, t2, t3;
4248
 
4249
                        if (post_shift >= BITS_PER_WORD
4250
                            || size - 1 >= BITS_PER_WORD)
4251
                          goto fail1;
4252
 
4253
                        extra_cost = (shift_cost[speed][compute_mode][post_shift]
4254
                                      + shift_cost[speed][compute_mode][size - 1]
4255
                                      + add_cost[speed][compute_mode]);
4256
                        t1 = expand_mult_highpart (compute_mode, op0, mlr,
4257
                                                   NULL_RTX, 0,
4258
                                                   max_cost - extra_cost);
4259
                        if (t1 == 0)
4260
                          goto fail1;
4261
                        t2 = expand_shift
4262
                          (RSHIFT_EXPR, compute_mode, t1,
4263
                           post_shift, NULL_RTX, 0);
4264
                        t3 = expand_shift
4265
                          (RSHIFT_EXPR, compute_mode, op0,
4266
                           size - 1, NULL_RTX, 0);
4267
                        if (d < 0)
4268
                          quotient
4269
                            = force_operand (gen_rtx_MINUS (compute_mode,
4270
                                                            t3, t2),
4271
                                             tquotient);
4272
                        else
4273
                          quotient
4274
                            = force_operand (gen_rtx_MINUS (compute_mode,
4275
                                                            t2, t3),
4276
                                             tquotient);
4277
                      }
4278
                    else
4279
                      {
4280
                        rtx t1, t2, t3, t4;
4281
 
4282
                        if (post_shift >= BITS_PER_WORD
4283
                            || size - 1 >= BITS_PER_WORD)
4284
                          goto fail1;
4285
 
4286
                        ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4287
                        mlr = gen_int_mode (ml, compute_mode);
4288
                        extra_cost = (shift_cost[speed][compute_mode][post_shift]
4289
                                      + shift_cost[speed][compute_mode][size - 1]
4290
                                      + 2 * add_cost[speed][compute_mode]);
4291
                        t1 = expand_mult_highpart (compute_mode, op0, mlr,
4292
                                                   NULL_RTX, 0,
4293
                                                   max_cost - extra_cost);
4294
                        if (t1 == 0)
4295
                          goto fail1;
4296
                        t2 = force_operand (gen_rtx_PLUS (compute_mode,
4297
                                                          t1, op0),
4298
                                            NULL_RTX);
4299
                        t3 = expand_shift
4300
                          (RSHIFT_EXPR, compute_mode, t2,
4301
                           post_shift, NULL_RTX, 0);
4302
                        t4 = expand_shift
4303
                          (RSHIFT_EXPR, compute_mode, op0,
4304
                           size - 1, NULL_RTX, 0);
4305
                        if (d < 0)
4306
                          quotient
4307
                            = force_operand (gen_rtx_MINUS (compute_mode,
4308
                                                            t4, t3),
4309
                                             tquotient);
4310
                        else
4311
                          quotient
4312
                            = force_operand (gen_rtx_MINUS (compute_mode,
4313
                                                            t3, t4),
4314
                                             tquotient);
4315
                      }
4316
                  }
4317
                else            /* Too wide mode to use tricky code */
4318
                  break;
4319
 
4320
                insn = get_last_insn ();
4321
                if (insn != last)
4322
                  set_dst_reg_note (insn, REG_EQUAL,
4323
                                    gen_rtx_DIV (compute_mode, op0, op1),
4324
                                    quotient);
4325
              }
4326
            break;
4327
          }
4328
      fail1:
4329
        delete_insns_since (last);
4330
        break;
4331
 
4332
      case FLOOR_DIV_EXPR:
4333
      case FLOOR_MOD_EXPR:
4334
      /* We will come here only for signed operations.  */
4335
        if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4336
          {
4337
            unsigned HOST_WIDE_INT mh;
4338
            int pre_shift, lgup, post_shift;
4339
            HOST_WIDE_INT d = INTVAL (op1);
4340
            rtx ml;
4341
 
4342
            if (d > 0)
4343
              {
4344
                /* We could just as easily deal with negative constants here,
4345
                   but it does not seem worth the trouble for GCC 2.6.  */
4346
                if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4347
                  {
4348
                    pre_shift = floor_log2 (d);
4349
                    if (rem_flag)
4350
                      {
4351
                        remainder = expand_binop (compute_mode, and_optab, op0,
4352
                                                  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4353
                                                  remainder, 0, OPTAB_LIB_WIDEN);
4354
                        if (remainder)
4355
                          return gen_lowpart (mode, remainder);
4356
                      }
4357
                    quotient = expand_shift
4358
                      (RSHIFT_EXPR, compute_mode, op0,
4359
                       pre_shift, tquotient, 0);
4360
                  }
4361
                else
4362
                  {
4363
                    rtx t1, t2, t3, t4;
4364
 
4365
                    mh = choose_multiplier (d, size, size - 1,
4366
                                            &ml, &post_shift, &lgup);
4367
                    gcc_assert (!mh);
4368
 
4369
                    if (post_shift < BITS_PER_WORD
4370
                        && size - 1 < BITS_PER_WORD)
4371
                      {
4372
                        t1 = expand_shift
4373
                          (RSHIFT_EXPR, compute_mode, op0,
4374
                           size - 1, NULL_RTX, 0);
4375
                        t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4376
                                           NULL_RTX, 0, OPTAB_WIDEN);
4377
                        extra_cost = (shift_cost[speed][compute_mode][post_shift]
4378
                                      + shift_cost[speed][compute_mode][size - 1]
4379
                                      + 2 * add_cost[speed][compute_mode]);
4380
                        t3 = expand_mult_highpart (compute_mode, t2, ml,
4381
                                                   NULL_RTX, 1,
4382
                                                   max_cost - extra_cost);
4383
                        if (t3 != 0)
4384
                          {
4385
                            t4 = expand_shift
4386
                              (RSHIFT_EXPR, compute_mode, t3,
4387
                               post_shift, NULL_RTX, 1);
4388
                            quotient = expand_binop (compute_mode, xor_optab,
4389
                                                     t4, t1, tquotient, 0,
4390
                                                     OPTAB_WIDEN);
4391
                          }
4392
                      }
4393
                  }
4394
              }
4395
            else
4396
              {
4397
                rtx nsign, t1, t2, t3, t4;
4398
                t1 = force_operand (gen_rtx_PLUS (compute_mode,
4399
                                                  op0, constm1_rtx), NULL_RTX);
4400
                t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4401
                                   0, OPTAB_WIDEN);
4402
                nsign = expand_shift
4403
                  (RSHIFT_EXPR, compute_mode, t2,
4404
                   size - 1, NULL_RTX, 0);
4405
                t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4406
                                    NULL_RTX);
4407
                t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4408
                                    NULL_RTX, 0);
4409
                if (t4)
4410
                  {
4411
                    rtx t5;
4412
                    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4413
                                      NULL_RTX, 0);
4414
                    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4415
                                                            t4, t5),
4416
                                              tquotient);
4417
                  }
4418
              }
4419
          }
4420
 
4421
        if (quotient != 0)
4422
          break;
4423
        delete_insns_since (last);
4424
 
4425
        /* Try using an instruction that produces both the quotient and
4426
           remainder, using truncation.  We can easily compensate the quotient
4427
           or remainder to get floor rounding, once we have the remainder.
4428
           Notice that we compute also the final remainder value here,
4429
           and return the result right away.  */
4430
        if (target == 0 || GET_MODE (target) != compute_mode)
4431
          target = gen_reg_rtx (compute_mode);
4432
 
4433
        if (rem_flag)
4434
          {
4435
            remainder
4436
              = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4437
            quotient = gen_reg_rtx (compute_mode);
4438
          }
4439
        else
4440
          {
4441
            quotient
4442
              = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4443
            remainder = gen_reg_rtx (compute_mode);
4444
          }
4445
 
4446
        if (expand_twoval_binop (sdivmod_optab, op0, op1,
4447
                                 quotient, remainder, 0))
4448
          {
4449
            /* This could be computed with a branch-less sequence.
4450
               Save that for later.  */
4451
            rtx tem;
4452
            rtx label = gen_label_rtx ();
4453
            do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4454
            tem = expand_binop (compute_mode, xor_optab, op0, op1,
4455
                                NULL_RTX, 0, OPTAB_WIDEN);
4456
            do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4457
            expand_dec (quotient, const1_rtx);
4458
            expand_inc (remainder, op1);
4459
            emit_label (label);
4460
            return gen_lowpart (mode, rem_flag ? remainder : quotient);
4461
          }
4462
 
4463
        /* No luck with division elimination or divmod.  Have to do it
4464
           by conditionally adjusting op0 *and* the result.  */
4465
        {
4466
          rtx label1, label2, label3, label4, label5;
4467
          rtx adjusted_op0;
4468
          rtx tem;
4469
 
4470
          quotient = gen_reg_rtx (compute_mode);
4471
          adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4472
          label1 = gen_label_rtx ();
4473
          label2 = gen_label_rtx ();
4474
          label3 = gen_label_rtx ();
4475
          label4 = gen_label_rtx ();
4476
          label5 = gen_label_rtx ();
4477
          do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4478
          do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4479
          tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4480
                              quotient, 0, OPTAB_LIB_WIDEN);
4481
          if (tem != quotient)
4482
            emit_move_insn (quotient, tem);
4483
          emit_jump_insn (gen_jump (label5));
4484
          emit_barrier ();
4485
          emit_label (label1);
4486
          expand_inc (adjusted_op0, const1_rtx);
4487
          emit_jump_insn (gen_jump (label4));
4488
          emit_barrier ();
4489
          emit_label (label2);
4490
          do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4491
          tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4492
                              quotient, 0, OPTAB_LIB_WIDEN);
4493
          if (tem != quotient)
4494
            emit_move_insn (quotient, tem);
4495
          emit_jump_insn (gen_jump (label5));
4496
          emit_barrier ();
4497
          emit_label (label3);
4498
          expand_dec (adjusted_op0, const1_rtx);
4499
          emit_label (label4);
4500
          tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4501
                              quotient, 0, OPTAB_LIB_WIDEN);
4502
          if (tem != quotient)
4503
            emit_move_insn (quotient, tem);
4504
          expand_dec (quotient, const1_rtx);
4505
          emit_label (label5);
4506
        }
4507
        break;
4508
 
4509
      case CEIL_DIV_EXPR:
4510
      case CEIL_MOD_EXPR:
4511
        if (unsignedp)
4512
          {
4513
            if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4514
              {
4515
                rtx t1, t2, t3;
4516
                unsigned HOST_WIDE_INT d = INTVAL (op1);
4517
                t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4518
                                   floor_log2 (d), tquotient, 1);
4519
                t2 = expand_binop (compute_mode, and_optab, op0,
4520
                                   GEN_INT (d - 1),
4521
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4522
                t3 = gen_reg_rtx (compute_mode);
4523
                t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4524
                                      compute_mode, 1, 1);
4525
                if (t3 == 0)
4526
                  {
4527
                    rtx lab;
4528
                    lab = gen_label_rtx ();
4529
                    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4530
                    expand_inc (t1, const1_rtx);
4531
                    emit_label (lab);
4532
                    quotient = t1;
4533
                  }
4534
                else
4535
                  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4536
                                                          t1, t3),
4537
                                            tquotient);
4538
                break;
4539
              }
4540
 
4541
            /* Try using an instruction that produces both the quotient and
4542
               remainder, using truncation.  We can easily compensate the
4543
               quotient or remainder to get ceiling rounding, once we have the
4544
               remainder.  Notice that we compute also the final remainder
4545
               value here, and return the result right away.  */
4546
            if (target == 0 || GET_MODE (target) != compute_mode)
4547
              target = gen_reg_rtx (compute_mode);
4548
 
4549
            if (rem_flag)
4550
              {
4551
                remainder = (REG_P (target)
4552
                             ? target : gen_reg_rtx (compute_mode));
4553
                quotient = gen_reg_rtx (compute_mode);
4554
              }
4555
            else
4556
              {
4557
                quotient = (REG_P (target)
4558
                            ? target : gen_reg_rtx (compute_mode));
4559
                remainder = gen_reg_rtx (compute_mode);
4560
              }
4561
 
4562
            if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4563
                                     remainder, 1))
4564
              {
4565
                /* This could be computed with a branch-less sequence.
4566
                   Save that for later.  */
4567
                rtx label = gen_label_rtx ();
4568
                do_cmp_and_jump (remainder, const0_rtx, EQ,
4569
                                 compute_mode, label);
4570
                expand_inc (quotient, const1_rtx);
4571
                expand_dec (remainder, op1);
4572
                emit_label (label);
4573
                return gen_lowpart (mode, rem_flag ? remainder : quotient);
4574
              }
4575
 
4576
            /* No luck with division elimination or divmod.  Have to do it
4577
               by conditionally adjusting op0 *and* the result.  */
4578
            {
4579
              rtx label1, label2;
4580
              rtx adjusted_op0, tem;
4581
 
4582
              quotient = gen_reg_rtx (compute_mode);
4583
              adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4584
              label1 = gen_label_rtx ();
4585
              label2 = gen_label_rtx ();
4586
              do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4587
                               compute_mode, label1);
4588
              emit_move_insn  (quotient, const0_rtx);
4589
              emit_jump_insn (gen_jump (label2));
4590
              emit_barrier ();
4591
              emit_label (label1);
4592
              expand_dec (adjusted_op0, const1_rtx);
4593
              tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4594
                                  quotient, 1, OPTAB_LIB_WIDEN);
4595
              if (tem != quotient)
4596
                emit_move_insn (quotient, tem);
4597
              expand_inc (quotient, const1_rtx);
4598
              emit_label (label2);
4599
            }
4600
          }
4601
        else /* signed */
4602
          {
4603
            if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4604
                && INTVAL (op1) >= 0)
4605
              {
4606
                /* This is extremely similar to the code for the unsigned case
4607
                   above.  For 2.7 we should merge these variants, but for
4608
                   2.6.1 I don't want to touch the code for unsigned since that
4609
                   get used in C.  The signed case will only be used by other
4610
                   languages (Ada).  */
4611
 
4612
                rtx t1, t2, t3;
4613
                unsigned HOST_WIDE_INT d = INTVAL (op1);
4614
                t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4615
                                   floor_log2 (d), tquotient, 0);
4616
                t2 = expand_binop (compute_mode, and_optab, op0,
4617
                                   GEN_INT (d - 1),
4618
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4619
                t3 = gen_reg_rtx (compute_mode);
4620
                t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4621
                                      compute_mode, 1, 1);
4622
                if (t3 == 0)
4623
                  {
4624
                    rtx lab;
4625
                    lab = gen_label_rtx ();
4626
                    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4627
                    expand_inc (t1, const1_rtx);
4628
                    emit_label (lab);
4629
                    quotient = t1;
4630
                  }
4631
                else
4632
                  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4633
                                                          t1, t3),
4634
                                            tquotient);
4635
                break;
4636
              }
4637
 
4638
            /* Try using an instruction that produces both the quotient and
4639
               remainder, using truncation.  We can easily compensate the
4640
               quotient or remainder to get ceiling rounding, once we have the
4641
               remainder.  Notice that we compute also the final remainder
4642
               value here, and return the result right away.  */
4643
            if (target == 0 || GET_MODE (target) != compute_mode)
4644
              target = gen_reg_rtx (compute_mode);
4645
            if (rem_flag)
4646
              {
4647
                remainder= (REG_P (target)
4648
                            ? target : gen_reg_rtx (compute_mode));
4649
                quotient = gen_reg_rtx (compute_mode);
4650
              }
4651
            else
4652
              {
4653
                quotient = (REG_P (target)
4654
                            ? target : gen_reg_rtx (compute_mode));
4655
                remainder = gen_reg_rtx (compute_mode);
4656
              }
4657
 
4658
            if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4659
                                     remainder, 0))
4660
              {
4661
                /* This could be computed with a branch-less sequence.
4662
                   Save that for later.  */
4663
                rtx tem;
4664
                rtx label = gen_label_rtx ();
4665
                do_cmp_and_jump (remainder, const0_rtx, EQ,
4666
                                 compute_mode, label);
4667
                tem = expand_binop (compute_mode, xor_optab, op0, op1,
4668
                                    NULL_RTX, 0, OPTAB_WIDEN);
4669
                do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4670
                expand_inc (quotient, const1_rtx);
4671
                expand_dec (remainder, op1);
4672
                emit_label (label);
4673
                return gen_lowpart (mode, rem_flag ? remainder : quotient);
4674
              }
4675
 
4676
            /* No luck with division elimination or divmod.  Have to do it
4677
               by conditionally adjusting op0 *and* the result.  */
4678
            {
4679
              rtx label1, label2, label3, label4, label5;
4680
              rtx adjusted_op0;
4681
              rtx tem;
4682
 
4683
              quotient = gen_reg_rtx (compute_mode);
4684
              adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4685
              label1 = gen_label_rtx ();
4686
              label2 = gen_label_rtx ();
4687
              label3 = gen_label_rtx ();
4688
              label4 = gen_label_rtx ();
4689
              label5 = gen_label_rtx ();
4690
              do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4691
              do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4692
                               compute_mode, label1);
4693
              tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4694
                                  quotient, 0, OPTAB_LIB_WIDEN);
4695
              if (tem != quotient)
4696
                emit_move_insn (quotient, tem);
4697
              emit_jump_insn (gen_jump (label5));
4698
              emit_barrier ();
4699
              emit_label (label1);
4700
              expand_dec (adjusted_op0, const1_rtx);
4701
              emit_jump_insn (gen_jump (label4));
4702
              emit_barrier ();
4703
              emit_label (label2);
4704
              do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4705
                               compute_mode, label3);
4706
              tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4707
                                  quotient, 0, OPTAB_LIB_WIDEN);
4708
              if (tem != quotient)
4709
                emit_move_insn (quotient, tem);
4710
              emit_jump_insn (gen_jump (label5));
4711
              emit_barrier ();
4712
              emit_label (label3);
4713
              expand_inc (adjusted_op0, const1_rtx);
4714
              emit_label (label4);
4715
              tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4716
                                  quotient, 0, OPTAB_LIB_WIDEN);
4717
              if (tem != quotient)
4718
                emit_move_insn (quotient, tem);
4719
              expand_inc (quotient, const1_rtx);
4720
              emit_label (label5);
4721
            }
4722
          }
4723
        break;
4724
 
4725
      case EXACT_DIV_EXPR:
4726
        if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4727
          {
4728
            HOST_WIDE_INT d = INTVAL (op1);
4729
            unsigned HOST_WIDE_INT ml;
4730
            int pre_shift;
4731
            rtx t1;
4732
 
4733
            pre_shift = floor_log2 (d & -d);
4734
            ml = invert_mod2n (d >> pre_shift, size);
4735
            t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4736
                               pre_shift, NULL_RTX, unsignedp);
4737
            quotient = expand_mult (compute_mode, t1,
4738
                                    gen_int_mode (ml, compute_mode),
4739
                                    NULL_RTX, 1);
4740
 
4741
            insn = get_last_insn ();
4742
            set_dst_reg_note (insn, REG_EQUAL,
4743
                              gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4744
                                              compute_mode, op0, op1),
4745
                              quotient);
4746
          }
4747
        break;
4748
 
4749
      case ROUND_DIV_EXPR:
4750
      case ROUND_MOD_EXPR:
4751
        if (unsignedp)
4752
          {
4753
            rtx tem;
4754
            rtx label;
4755
            label = gen_label_rtx ();
4756
            quotient = gen_reg_rtx (compute_mode);
4757
            remainder = gen_reg_rtx (compute_mode);
4758
            if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4759
              {
4760
                rtx tem;
4761
                quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4762
                                         quotient, 1, OPTAB_LIB_WIDEN);
4763
                tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4764
                remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4765
                                          remainder, 1, OPTAB_LIB_WIDEN);
4766
              }
4767
            tem = plus_constant (op1, -1);
4768
            tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4769
            do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4770
            expand_inc (quotient, const1_rtx);
4771
            expand_dec (remainder, op1);
4772
            emit_label (label);
4773
          }
4774
        else
4775
          {
4776
            rtx abs_rem, abs_op1, tem, mask;
4777
            rtx label;
4778
            label = gen_label_rtx ();
4779
            quotient = gen_reg_rtx (compute_mode);
4780
            remainder = gen_reg_rtx (compute_mode);
4781
            if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4782
              {
4783
                rtx tem;
4784
                quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4785
                                         quotient, 0, OPTAB_LIB_WIDEN);
4786
                tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4787
                remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4788
                                          remainder, 0, OPTAB_LIB_WIDEN);
4789
              }
4790
            abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4791
            abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4792
            tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4793
                                1, NULL_RTX, 1);
4794
            do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4795
            tem = expand_binop (compute_mode, xor_optab, op0, op1,
4796
                                NULL_RTX, 0, OPTAB_WIDEN);
4797
            mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4798
                                 size - 1, NULL_RTX, 0);
4799
            tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4800
                                NULL_RTX, 0, OPTAB_WIDEN);
4801
            tem = expand_binop (compute_mode, sub_optab, tem, mask,
4802
                                NULL_RTX, 0, OPTAB_WIDEN);
4803
            expand_inc (quotient, tem);
4804
            tem = expand_binop (compute_mode, xor_optab, mask, op1,
4805
                                NULL_RTX, 0, OPTAB_WIDEN);
4806
            tem = expand_binop (compute_mode, sub_optab, tem, mask,
4807
                                NULL_RTX, 0, OPTAB_WIDEN);
4808
            expand_dec (remainder, tem);
4809
            emit_label (label);
4810
          }
4811
        return gen_lowpart (mode, rem_flag ? remainder : quotient);
4812
 
4813
      default:
4814
        gcc_unreachable ();
4815
      }
4816
 
4817
  if (quotient == 0)
4818
    {
4819
      if (target && GET_MODE (target) != compute_mode)
4820
        target = 0;
4821
 
4822
      if (rem_flag)
4823
        {
4824
          /* Try to produce the remainder without producing the quotient.
4825
             If we seem to have a divmod pattern that does not require widening,
4826
             don't try widening here.  We should really have a WIDEN argument
4827
             to expand_twoval_binop, since what we'd really like to do here is
4828
             1) try a mod insn in compute_mode
4829
             2) try a divmod insn in compute_mode
4830
             3) try a div insn in compute_mode and multiply-subtract to get
4831
                remainder
4832
             4) try the same things with widening allowed.  */
4833
          remainder
4834
            = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4835
                                 op0, op1, target,
4836
                                 unsignedp,
4837
                                 ((optab_handler (optab2, compute_mode)
4838
                                   != CODE_FOR_nothing)
4839
                                  ? OPTAB_DIRECT : OPTAB_WIDEN));
4840
          if (remainder == 0)
4841
            {
4842
              /* No luck there.  Can we do remainder and divide at once
4843
                 without a library call?  */
4844
              remainder = gen_reg_rtx (compute_mode);
4845
              if (! expand_twoval_binop ((unsignedp
4846
                                          ? udivmod_optab
4847
                                          : sdivmod_optab),
4848
                                         op0, op1,
4849
                                         NULL_RTX, remainder, unsignedp))
4850
                remainder = 0;
4851
            }
4852
 
4853
          if (remainder)
4854
            return gen_lowpart (mode, remainder);
4855
        }
4856
 
4857
      /* Produce the quotient.  Try a quotient insn, but not a library call.
4858
         If we have a divmod in this mode, use it in preference to widening
4859
         the div (for this test we assume it will not fail). Note that optab2
4860
         is set to the one of the two optabs that the call below will use.  */
4861
      quotient
4862
        = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4863
                             op0, op1, rem_flag ? NULL_RTX : target,
4864
                             unsignedp,
4865
                             ((optab_handler (optab2, compute_mode)
4866
                               != CODE_FOR_nothing)
4867
                              ? OPTAB_DIRECT : OPTAB_WIDEN));
4868
 
4869
      if (quotient == 0)
4870
        {
4871
          /* No luck there.  Try a quotient-and-remainder insn,
4872
             keeping the quotient alone.  */
4873
          quotient = gen_reg_rtx (compute_mode);
4874
          if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4875
                                     op0, op1,
4876
                                     quotient, NULL_RTX, unsignedp))
4877
            {
4878
              quotient = 0;
4879
              if (! rem_flag)
4880
                /* Still no luck.  If we are not computing the remainder,
4881
                   use a library call for the quotient.  */
4882
                quotient = sign_expand_binop (compute_mode,
4883
                                              udiv_optab, sdiv_optab,
4884
                                              op0, op1, target,
4885
                                              unsignedp, OPTAB_LIB_WIDEN);
4886
            }
4887
        }
4888
    }
4889
 
4890
  if (rem_flag)
4891
    {
4892
      if (target && GET_MODE (target) != compute_mode)
4893
        target = 0;
4894
 
4895
      if (quotient == 0)
4896
        {
4897
          /* No divide instruction either.  Use library for remainder.  */
4898
          remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4899
                                         op0, op1, target,
4900
                                         unsignedp, OPTAB_LIB_WIDEN);
4901
          /* No remainder function.  Try a quotient-and-remainder
4902
             function, keeping the remainder.  */
4903
          if (!remainder)
4904
            {
4905
              remainder = gen_reg_rtx (compute_mode);
4906
              if (!expand_twoval_binop_libfunc
4907
                  (unsignedp ? udivmod_optab : sdivmod_optab,
4908
                   op0, op1,
4909
                   NULL_RTX, remainder,
4910
                   unsignedp ? UMOD : MOD))
4911
                remainder = NULL_RTX;
4912
            }
4913
        }
4914
      else
4915
        {
4916
          /* We divided.  Now finish doing X - Y * (X / Y).  */
4917
          remainder = expand_mult (compute_mode, quotient, op1,
4918
                                   NULL_RTX, unsignedp);
4919
          remainder = expand_binop (compute_mode, sub_optab, op0,
4920
                                    remainder, target, unsignedp,
4921
                                    OPTAB_LIB_WIDEN);
4922
        }
4923
    }
4924
 
4925
  return gen_lowpart (mode, rem_flag ? remainder : quotient);
4926
}
4927
 
4928
/* Return a tree node with data type TYPE, describing the value of X.
4929
   Usually this is an VAR_DECL, if there is no obvious better choice.
4930
   X may be an expression, however we only support those expressions
4931
   generated by loop.c.  */
4932
 
4933
tree
4934
make_tree (tree type, rtx x)
4935
{
4936
  tree t;
4937
 
4938
  switch (GET_CODE (x))
4939
    {
4940
    case CONST_INT:
4941
      {
4942
        HOST_WIDE_INT hi = 0;
4943
 
4944
        if (INTVAL (x) < 0
4945
            && !(TYPE_UNSIGNED (type)
4946
                 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4947
                     < HOST_BITS_PER_WIDE_INT)))
4948
          hi = -1;
4949
 
4950
        t = build_int_cst_wide (type, INTVAL (x), hi);
4951
 
4952
        return t;
4953
      }
4954
 
4955
    case CONST_DOUBLE:
4956
      if (GET_MODE (x) == VOIDmode)
4957
        t = build_int_cst_wide (type,
4958
                                CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4959
      else
4960
        {
4961
          REAL_VALUE_TYPE d;
4962
 
4963
          REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4964
          t = build_real (type, d);
4965
        }
4966
 
4967
      return t;
4968
 
4969
    case CONST_VECTOR:
4970
      {
4971
        int units = CONST_VECTOR_NUNITS (x);
4972
        tree itype = TREE_TYPE (type);
4973
        tree t = NULL_TREE;
4974
        int i;
4975
 
4976
 
4977
        /* Build a tree with vector elements.  */
4978
        for (i = units - 1; i >= 0; --i)
4979
          {
4980
            rtx elt = CONST_VECTOR_ELT (x, i);
4981
            t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4982
          }
4983
 
4984
        return build_vector (type, t);
4985
      }
4986
 
4987
    case PLUS:
4988
      return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4989
                          make_tree (type, XEXP (x, 1)));
4990
 
4991
    case MINUS:
4992
      return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4993
                          make_tree (type, XEXP (x, 1)));
4994
 
4995
    case NEG:
4996
      return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4997
 
4998
    case MULT:
4999
      return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5000
                          make_tree (type, XEXP (x, 1)));
5001
 
5002
    case ASHIFT:
5003
      return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5004
                          make_tree (type, XEXP (x, 1)));
5005
 
5006
    case LSHIFTRT:
5007
      t = unsigned_type_for (type);
5008
      return fold_convert (type, build2 (RSHIFT_EXPR, t,
5009
                                         make_tree (t, XEXP (x, 0)),
5010
                                         make_tree (type, XEXP (x, 1))));
5011
 
5012
    case ASHIFTRT:
5013
      t = signed_type_for (type);
5014
      return fold_convert (type, build2 (RSHIFT_EXPR, t,
5015
                                         make_tree (t, XEXP (x, 0)),
5016
                                         make_tree (type, XEXP (x, 1))));
5017
 
5018
    case DIV:
5019
      if (TREE_CODE (type) != REAL_TYPE)
5020
        t = signed_type_for (type);
5021
      else
5022
        t = type;
5023
 
5024
      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5025
                                         make_tree (t, XEXP (x, 0)),
5026
                                         make_tree (t, XEXP (x, 1))));
5027
    case UDIV:
5028
      t = unsigned_type_for (type);
5029
      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5030
                                         make_tree (t, XEXP (x, 0)),
5031
                                         make_tree (t, XEXP (x, 1))));
5032
 
5033
    case SIGN_EXTEND:
5034
    case ZERO_EXTEND:
5035
      t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5036
                                          GET_CODE (x) == ZERO_EXTEND);
5037
      return fold_convert (type, make_tree (t, XEXP (x, 0)));
5038
 
5039
    case CONST:
5040
      return make_tree (type, XEXP (x, 0));
5041
 
5042
    case SYMBOL_REF:
5043
      t = SYMBOL_REF_DECL (x);
5044
      if (t)
5045
        return fold_convert (type, build_fold_addr_expr (t));
5046
      /* else fall through.  */
5047
 
5048
    default:
5049
      t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5050
 
5051
      /* If TYPE is a POINTER_TYPE, we might need to convert X from
5052
         address mode to pointer mode.  */
5053
      if (POINTER_TYPE_P (type))
5054
        x = convert_memory_address_addr_space
5055
              (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5056
 
5057
      /* Note that we do *not* use SET_DECL_RTL here, because we do not
5058
         want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5059
      t->decl_with_rtl.rtl = x;
5060
 
5061
      return t;
5062
    }
5063
}
5064
 
5065
/* Compute the logical-and of OP0 and OP1, storing it in TARGET
5066
   and returning TARGET.
5067
 
5068
   If TARGET is 0, a pseudo-register or constant is returned.  */
5069
 
5070
rtx
5071
expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5072
{
5073
  rtx tem = 0;
5074
 
5075
  if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5076
    tem = simplify_binary_operation (AND, mode, op0, op1);
5077
  if (tem == 0)
5078
    tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5079
 
5080
  if (target == 0)
5081
    target = tem;
5082
  else if (tem != target)
5083
    emit_move_insn (target, tem);
5084
  return target;
5085
}
5086
 
5087
/* Helper function for emit_store_flag.  */
5088
static rtx
5089
emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5090
             enum machine_mode mode, enum machine_mode compare_mode,
5091
             int unsignedp, rtx x, rtx y, int normalizep,
5092
             enum machine_mode target_mode)
5093
{
5094
  struct expand_operand ops[4];
5095
  rtx op0, last, comparison, subtarget;
5096
  enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5097
 
5098
  last = get_last_insn ();
5099
  x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5100
  y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5101
  if (!x || !y)
5102
    {
5103
      delete_insns_since (last);
5104
      return NULL_RTX;
5105
    }
5106
 
5107
  if (target_mode == VOIDmode)
5108
    target_mode = result_mode;
5109
  if (!target)
5110
    target = gen_reg_rtx (target_mode);
5111
 
5112
  comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5113
 
5114
  create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5115
  create_fixed_operand (&ops[1], comparison);
5116
  create_fixed_operand (&ops[2], x);
5117
  create_fixed_operand (&ops[3], y);
5118
  if (!maybe_expand_insn (icode, 4, ops))
5119
    {
5120
      delete_insns_since (last);
5121
      return NULL_RTX;
5122
    }
5123
  subtarget = ops[0].value;
5124
 
5125
  /* If we are converting to a wider mode, first convert to
5126
     TARGET_MODE, then normalize.  This produces better combining
5127
     opportunities on machines that have a SIGN_EXTRACT when we are
5128
     testing a single bit.  This mostly benefits the 68k.
5129
 
5130
     If STORE_FLAG_VALUE does not have the sign bit set when
5131
     interpreted in MODE, we can do this conversion as unsigned, which
5132
     is usually more efficient.  */
5133
  if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5134
    {
5135
      convert_move (target, subtarget,
5136
                    val_signbit_known_clear_p (result_mode,
5137
                                               STORE_FLAG_VALUE));
5138
      op0 = target;
5139
      result_mode = target_mode;
5140
    }
5141
  else
5142
    op0 = subtarget;
5143
 
5144
  /* If we want to keep subexpressions around, don't reuse our last
5145
     target.  */
5146
  if (optimize)
5147
    subtarget = 0;
5148
 
5149
  /* Now normalize to the proper value in MODE.  Sometimes we don't
5150
     have to do anything.  */
5151
  if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5152
    ;
5153
  /* STORE_FLAG_VALUE might be the most negative number, so write
5154
     the comparison this way to avoid a compiler-time warning.  */
5155
  else if (- normalizep == STORE_FLAG_VALUE)
5156
    op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5157
 
5158
  /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5159
     it hard to use a value of just the sign bit due to ANSI integer
5160
     constant typing rules.  */
5161
  else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5162
    op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5163
                        GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5164
                        normalizep == 1);
5165
  else
5166
    {
5167
      gcc_assert (STORE_FLAG_VALUE & 1);
5168
 
5169
      op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5170
      if (normalizep == -1)
5171
        op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5172
    }
5173
 
5174
  /* If we were converting to a smaller mode, do the conversion now.  */
5175
  if (target_mode != result_mode)
5176
    {
5177
      convert_move (target, op0, 0);
5178
      return target;
5179
    }
5180
  else
5181
    return op0;
5182
}
5183
 
5184
 
5185
/* A subroutine of emit_store_flag only including "tricks" that do not
5186
   need a recursive call.  These are kept separate to avoid infinite
5187
   loops.  */
5188
 
5189
static rtx
5190
emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5191
                   enum machine_mode mode, int unsignedp, int normalizep,
5192
                   enum machine_mode target_mode)
5193
{
5194
  rtx subtarget;
5195
  enum insn_code icode;
5196
  enum machine_mode compare_mode;
5197
  enum mode_class mclass;
5198
  enum rtx_code scode;
5199
  rtx tem;
5200
 
5201
  if (unsignedp)
5202
    code = unsigned_condition (code);
5203
  scode = swap_condition (code);
5204
 
5205
  /* If one operand is constant, make it the second one.  Only do this
5206
     if the other operand is not constant as well.  */
5207
 
5208
  if (swap_commutative_operands_p (op0, op1))
5209
    {
5210
      tem = op0;
5211
      op0 = op1;
5212
      op1 = tem;
5213
      code = swap_condition (code);
5214
    }
5215
 
5216
  if (mode == VOIDmode)
5217
    mode = GET_MODE (op0);
5218
 
5219
  /* For some comparisons with 1 and -1, we can convert this to
5220
     comparisons with zero.  This will often produce more opportunities for
5221
     store-flag insns.  */
5222
 
5223
  switch (code)
5224
    {
5225
    case LT:
5226
      if (op1 == const1_rtx)
5227
        op1 = const0_rtx, code = LE;
5228
      break;
5229
    case LE:
5230
      if (op1 == constm1_rtx)
5231
        op1 = const0_rtx, code = LT;
5232
      break;
5233
    case GE:
5234
      if (op1 == const1_rtx)
5235
        op1 = const0_rtx, code = GT;
5236
      break;
5237
    case GT:
5238
      if (op1 == constm1_rtx)
5239
        op1 = const0_rtx, code = GE;
5240
      break;
5241
    case GEU:
5242
      if (op1 == const1_rtx)
5243
        op1 = const0_rtx, code = NE;
5244
      break;
5245
    case LTU:
5246
      if (op1 == const1_rtx)
5247
        op1 = const0_rtx, code = EQ;
5248
      break;
5249
    default:
5250
      break;
5251
    }
5252
 
5253
  /* If we are comparing a double-word integer with zero or -1, we can
5254
     convert the comparison into one involving a single word.  */
5255
  if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5256
      && GET_MODE_CLASS (mode) == MODE_INT
5257
      && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5258
    {
5259
      if ((code == EQ || code == NE)
5260
          && (op1 == const0_rtx || op1 == constm1_rtx))
5261
        {
5262
          rtx op00, op01;
5263
 
5264
          /* Do a logical OR or AND of the two words and compare the
5265
             result.  */
5266
          op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5267
          op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5268
          tem = expand_binop (word_mode,
5269
                              op1 == const0_rtx ? ior_optab : and_optab,
5270
                              op00, op01, NULL_RTX, unsignedp,
5271
                              OPTAB_DIRECT);
5272
 
5273
          if (tem != 0)
5274
            tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5275
                                   unsignedp, normalizep);
5276
        }
5277
      else if ((code == LT || code == GE) && op1 == const0_rtx)
5278
        {
5279
          rtx op0h;
5280
 
5281
          /* If testing the sign bit, can just test on high word.  */
5282
          op0h = simplify_gen_subreg (word_mode, op0, mode,
5283
                                      subreg_highpart_offset (word_mode,
5284
                                                              mode));
5285
          tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5286
                                 unsignedp, normalizep);
5287
        }
5288
      else
5289
        tem = NULL_RTX;
5290
 
5291
      if (tem)
5292
        {
5293
          if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5294
            return tem;
5295
          if (!target)
5296
            target = gen_reg_rtx (target_mode);
5297
 
5298
          convert_move (target, tem,
5299
                        !val_signbit_known_set_p (word_mode,
5300
                                                  (normalizep ? normalizep
5301
                                                   : STORE_FLAG_VALUE)));
5302
          return target;
5303
        }
5304
    }
5305
 
5306
  /* If this is A < 0 or A >= 0, we can do this by taking the ones
5307
     complement of A (for GE) and shifting the sign bit to the low bit.  */
5308
  if (op1 == const0_rtx && (code == LT || code == GE)
5309
      && GET_MODE_CLASS (mode) == MODE_INT
5310
      && (normalizep || STORE_FLAG_VALUE == 1
5311
          || val_signbit_p (mode, STORE_FLAG_VALUE)))
5312
    {
5313
      subtarget = target;
5314
 
5315
      if (!target)
5316
        target_mode = mode;
5317
 
5318
      /* If the result is to be wider than OP0, it is best to convert it
5319
         first.  If it is to be narrower, it is *incorrect* to convert it
5320
         first.  */
5321
      else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5322
        {
5323
          op0 = convert_modes (target_mode, mode, op0, 0);
5324
          mode = target_mode;
5325
        }
5326
 
5327
      if (target_mode != mode)
5328
        subtarget = 0;
5329
 
5330
      if (code == GE)
5331
        op0 = expand_unop (mode, one_cmpl_optab, op0,
5332
                           ((STORE_FLAG_VALUE == 1 || normalizep)
5333
                            ? 0 : subtarget), 0);
5334
 
5335
      if (STORE_FLAG_VALUE == 1 || normalizep)
5336
        /* If we are supposed to produce a 0/1 value, we want to do
5337
           a logical shift from the sign bit to the low-order bit; for
5338
           a -1/0 value, we do an arithmetic shift.  */
5339
        op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5340
                            GET_MODE_BITSIZE (mode) - 1,
5341
                            subtarget, normalizep != -1);
5342
 
5343
      if (mode != target_mode)
5344
        op0 = convert_modes (target_mode, mode, op0, 0);
5345
 
5346
      return op0;
5347
    }
5348
 
5349
  mclass = GET_MODE_CLASS (mode);
5350
  for (compare_mode = mode; compare_mode != VOIDmode;
5351
       compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5352
    {
5353
     enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5354
     icode = optab_handler (cstore_optab, optab_mode);
5355
     if (icode != CODE_FOR_nothing)
5356
        {
5357
          do_pending_stack_adjust ();
5358
          tem = emit_cstore (target, icode, code, mode, compare_mode,
5359
                             unsignedp, op0, op1, normalizep, target_mode);
5360
          if (tem)
5361
            return tem;
5362
 
5363
          if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5364
            {
5365
              tem = emit_cstore (target, icode, scode, mode, compare_mode,
5366
                                 unsignedp, op1, op0, normalizep, target_mode);
5367
              if (tem)
5368
                return tem;
5369
            }
5370
          break;
5371
        }
5372
    }
5373
 
5374
  return 0;
5375
}
5376
 
5377
/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5378
   and storing in TARGET.  Normally return TARGET.
5379
   Return 0 if that cannot be done.
5380
 
5381
   MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5382
   it is VOIDmode, they cannot both be CONST_INT.
5383
 
5384
   UNSIGNEDP is for the case where we have to widen the operands
5385
   to perform the operation.  It says to use zero-extension.
5386
 
5387
   NORMALIZEP is 1 if we should convert the result to be either zero
5388
   or one.  Normalize is -1 if we should convert the result to be
5389
   either zero or -1.  If NORMALIZEP is zero, the result will be left
5390
   "raw" out of the scc insn.  */
5391
 
5392
rtx
5393
emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5394
                 enum machine_mode mode, int unsignedp, int normalizep)
5395
{
5396
  enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5397
  enum rtx_code rcode;
5398
  rtx subtarget;
5399
  rtx tem, last, trueval;
5400
 
5401
  tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5402
                           target_mode);
5403
  if (tem)
5404
    return tem;
5405
 
5406
  /* If we reached here, we can't do this with a scc insn, however there
5407
     are some comparisons that can be done in other ways.  Don't do any
5408
     of these cases if branches are very cheap.  */
5409
  if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5410
    return 0;
5411
 
5412
  /* See what we need to return.  We can only return a 1, -1, or the
5413
     sign bit.  */
5414
 
5415
  if (normalizep == 0)
5416
    {
5417
      if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5418
        normalizep = STORE_FLAG_VALUE;
5419
 
5420
      else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5421
        ;
5422
      else
5423
        return 0;
5424
    }
5425
 
5426
  last = get_last_insn ();
5427
 
5428
  /* If optimizing, use different pseudo registers for each insn, instead
5429
     of reusing the same pseudo.  This leads to better CSE, but slows
5430
     down the compiler, since there are more pseudos */
5431
  subtarget = (!optimize
5432
               && (target_mode == mode)) ? target : NULL_RTX;
5433
  trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5434
 
5435
  /* For floating-point comparisons, try the reverse comparison or try
5436
     changing the "orderedness" of the comparison.  */
5437
  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5438
    {
5439
      enum rtx_code first_code;
5440
      bool and_them;
5441
 
5442
      rcode = reverse_condition_maybe_unordered (code);
5443
      if (can_compare_p (rcode, mode, ccp_store_flag)
5444
          && (code == ORDERED || code == UNORDERED
5445
              || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5446
              || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5447
        {
5448
          int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5449
                          || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5450
 
5451
          /* For the reverse comparison, use either an addition or a XOR.  */
5452
          if (want_add
5453
              && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5454
                           optimize_insn_for_speed_p ()) == 0)
5455
            {
5456
              tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5457
                                       STORE_FLAG_VALUE, target_mode);
5458
              if (tem)
5459
                return expand_binop (target_mode, add_optab, tem,
5460
                                     GEN_INT (normalizep),
5461
                                     target, 0, OPTAB_WIDEN);
5462
            }
5463
          else if (!want_add
5464
                   && rtx_cost (trueval, XOR, 1,
5465
                                optimize_insn_for_speed_p ()) == 0)
5466
            {
5467
              tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5468
                                       normalizep, target_mode);
5469
              if (tem)
5470
                return expand_binop (target_mode, xor_optab, tem, trueval,
5471
                                     target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5472
            }
5473
        }
5474
 
5475
      delete_insns_since (last);
5476
 
5477
      /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5478
      if (code == ORDERED || code == UNORDERED)
5479
        return 0;
5480
 
5481
      and_them = split_comparison (code, mode, &first_code, &code);
5482
 
5483
      /* If there are no NaNs, the first comparison should always fall through.
5484
         Effectively change the comparison to the other one.  */
5485
      if (!HONOR_NANS (mode))
5486
        {
5487
          gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5488
          return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5489
                                    target_mode);
5490
        }
5491
 
5492
#ifdef HAVE_conditional_move
5493
      /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5494
         conditional move.  */
5495
      tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5496
                               normalizep, target_mode);
5497
      if (tem == 0)
5498
        return 0;
5499
 
5500
      if (and_them)
5501
        tem = emit_conditional_move (target, code, op0, op1, mode,
5502
                                     tem, const0_rtx, GET_MODE (tem), 0);
5503
      else
5504
        tem = emit_conditional_move (target, code, op0, op1, mode,
5505
                                     trueval, tem, GET_MODE (tem), 0);
5506
 
5507
      if (tem == 0)
5508
        delete_insns_since (last);
5509
      return tem;
5510
#else
5511
      return 0;
5512
#endif
5513
    }
5514
 
5515
  /* The remaining tricks only apply to integer comparisons.  */
5516
 
5517
  if (GET_MODE_CLASS (mode) != MODE_INT)
5518
    return 0;
5519
 
5520
  /* If this is an equality comparison of integers, we can try to exclusive-or
5521
     (or subtract) the two operands and use a recursive call to try the
5522
     comparison with zero.  Don't do any of these cases if branches are
5523
     very cheap.  */
5524
 
5525
  if ((code == EQ || code == NE) && op1 != const0_rtx)
5526
    {
5527
      tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5528
                          OPTAB_WIDEN);
5529
 
5530
      if (tem == 0)
5531
        tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5532
                            OPTAB_WIDEN);
5533
      if (tem != 0)
5534
        tem = emit_store_flag (target, code, tem, const0_rtx,
5535
                               mode, unsignedp, normalizep);
5536
      if (tem != 0)
5537
        return tem;
5538
 
5539
      delete_insns_since (last);
5540
    }
5541
 
5542
  /* For integer comparisons, try the reverse comparison.  However, for
5543
     small X and if we'd have anyway to extend, implementing "X != 0"
5544
     as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5545
  rcode = reverse_condition (code);
5546
  if (can_compare_p (rcode, mode, ccp_store_flag)
5547
      && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5548
            && code == NE
5549
            && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5550
            && op1 == const0_rtx))
5551
    {
5552
      int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5553
                      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5554
 
5555
      /* Again, for the reverse comparison, use either an addition or a XOR.  */
5556
      if (want_add
5557
          && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5558
                       optimize_insn_for_speed_p ()) == 0)
5559
        {
5560
          tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5561
                                   STORE_FLAG_VALUE, target_mode);
5562
          if (tem != 0)
5563
            tem = expand_binop (target_mode, add_optab, tem,
5564
                                GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5565
        }
5566
      else if (!want_add
5567
               && rtx_cost (trueval, XOR, 1,
5568
                            optimize_insn_for_speed_p ()) == 0)
5569
        {
5570
          tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5571
                                   normalizep, target_mode);
5572
          if (tem != 0)
5573
            tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5574
                                INTVAL (trueval) >= 0, OPTAB_WIDEN);
5575
        }
5576
 
5577
      if (tem != 0)
5578
        return tem;
5579
      delete_insns_since (last);
5580
    }
5581
 
5582
  /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5583
     the constant zero.  Reject all other comparisons at this point.  Only
5584
     do LE and GT if branches are expensive since they are expensive on
5585
     2-operand machines.  */
5586
 
5587
  if (op1 != const0_rtx
5588
      || (code != EQ && code != NE
5589
          && (BRANCH_COST (optimize_insn_for_speed_p (),
5590
                           false) <= 1 || (code != LE && code != GT))))
5591
    return 0;
5592
 
5593
  /* Try to put the result of the comparison in the sign bit.  Assume we can't
5594
     do the necessary operation below.  */
5595
 
5596
  tem = 0;
5597
 
5598
  /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5599
     the sign bit set.  */
5600
 
5601
  if (code == LE)
5602
    {
5603
      /* This is destructive, so SUBTARGET can't be OP0.  */
5604
      if (rtx_equal_p (subtarget, op0))
5605
        subtarget = 0;
5606
 
5607
      tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5608
                          OPTAB_WIDEN);
5609
      if (tem)
5610
        tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5611
                            OPTAB_WIDEN);
5612
    }
5613
 
5614
  /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5615
     number of bits in the mode of OP0, minus one.  */
5616
 
5617
  if (code == GT)
5618
    {
5619
      if (rtx_equal_p (subtarget, op0))
5620
        subtarget = 0;
5621
 
5622
      tem = expand_shift (RSHIFT_EXPR, mode, op0,
5623
                          GET_MODE_BITSIZE (mode) - 1,
5624
                          subtarget, 0);
5625
      tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5626
                          OPTAB_WIDEN);
5627
    }
5628
 
5629
  if (code == EQ || code == NE)
5630
    {
5631
      /* For EQ or NE, one way to do the comparison is to apply an operation
5632
         that converts the operand into a positive number if it is nonzero
5633
         or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5634
         for NE we negate.  This puts the result in the sign bit.  Then we
5635
         normalize with a shift, if needed.
5636
 
5637
         Two operations that can do the above actions are ABS and FFS, so try
5638
         them.  If that doesn't work, and MODE is smaller than a full word,
5639
         we can use zero-extension to the wider mode (an unsigned conversion)
5640
         as the operation.  */
5641
 
5642
      /* Note that ABS doesn't yield a positive number for INT_MIN, but
5643
         that is compensated by the subsequent overflow when subtracting
5644
         one / negating.  */
5645
 
5646
      if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5647
        tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5648
      else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5649
        tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5650
      else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5651
        {
5652
          tem = convert_modes (word_mode, mode, op0, 1);
5653
          mode = word_mode;
5654
        }
5655
 
5656
      if (tem != 0)
5657
        {
5658
          if (code == EQ)
5659
            tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5660
                                0, OPTAB_WIDEN);
5661
          else
5662
            tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5663
        }
5664
 
5665
      /* If we couldn't do it that way, for NE we can "or" the two's complement
5666
         of the value with itself.  For EQ, we take the one's complement of
5667
         that "or", which is an extra insn, so we only handle EQ if branches
5668
         are expensive.  */
5669
 
5670
      if (tem == 0
5671
          && (code == NE
5672
              || BRANCH_COST (optimize_insn_for_speed_p (),
5673
                              false) > 1))
5674
        {
5675
          if (rtx_equal_p (subtarget, op0))
5676
            subtarget = 0;
5677
 
5678
          tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5679
          tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5680
                              OPTAB_WIDEN);
5681
 
5682
          if (tem && code == EQ)
5683
            tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5684
        }
5685
    }
5686
 
5687
  if (tem && normalizep)
5688
    tem = expand_shift (RSHIFT_EXPR, mode, tem,
5689
                        GET_MODE_BITSIZE (mode) - 1,
5690
                        subtarget, normalizep == 1);
5691
 
5692
  if (tem)
5693
    {
5694
      if (!target)
5695
        ;
5696
      else if (GET_MODE (tem) != target_mode)
5697
        {
5698
          convert_move (target, tem, 0);
5699
          tem = target;
5700
        }
5701
      else if (!subtarget)
5702
        {
5703
          emit_move_insn (target, tem);
5704
          tem = target;
5705
        }
5706
    }
5707
  else
5708
    delete_insns_since (last);
5709
 
5710
  return tem;
5711
}
5712
 
5713
/* Like emit_store_flag, but always succeeds.  */
5714
 
5715
rtx
5716
emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5717
                       enum machine_mode mode, int unsignedp, int normalizep)
5718
{
5719
  rtx tem, label;
5720
  rtx trueval, falseval;
5721
 
5722
  /* First see if emit_store_flag can do the job.  */
5723
  tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5724
  if (tem != 0)
5725
    return tem;
5726
 
5727
  if (!target)
5728
    target = gen_reg_rtx (word_mode);
5729
 
5730
  /* If this failed, we have to do this with set/compare/jump/set code.
5731
     For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5732
  trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5733
  if (code == NE
5734
      && GET_MODE_CLASS (mode) == MODE_INT
5735
      && REG_P (target)
5736
      && op0 == target
5737
      && op1 == const0_rtx)
5738
    {
5739
      label = gen_label_rtx ();
5740
      do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5741
                               mode, NULL_RTX, NULL_RTX, label, -1);
5742
      emit_move_insn (target, trueval);
5743
      emit_label (label);
5744
      return target;
5745
    }
5746
 
5747
  if (!REG_P (target)
5748
      || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5749
    target = gen_reg_rtx (GET_MODE (target));
5750
 
5751
  /* Jump in the right direction if the target cannot implement CODE
5752
     but can jump on its reverse condition.  */
5753
  falseval = const0_rtx;
5754
  if (! can_compare_p (code, mode, ccp_jump)
5755
      && (! FLOAT_MODE_P (mode)
5756
          || code == ORDERED || code == UNORDERED
5757
          || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5758
          || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5759
    {
5760
      enum rtx_code rcode;
5761
      if (FLOAT_MODE_P (mode))
5762
        rcode = reverse_condition_maybe_unordered (code);
5763
      else
5764
        rcode = reverse_condition (code);
5765
 
5766
      /* Canonicalize to UNORDERED for the libcall.  */
5767
      if (can_compare_p (rcode, mode, ccp_jump)
5768
          || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5769
        {
5770
          falseval = trueval;
5771
          trueval = const0_rtx;
5772
          code = rcode;
5773
        }
5774
    }
5775
 
5776
  emit_move_insn (target, trueval);
5777
  label = gen_label_rtx ();
5778
  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5779
                           NULL_RTX, label, -1);
5780
 
5781
  emit_move_insn (target, falseval);
5782
  emit_label (label);
5783
 
5784
  return target;
5785
}
5786
 
5787
/* Perform possibly multi-word comparison and conditional jump to LABEL
5788
   if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5789
   now a thin wrapper around do_compare_rtx_and_jump.  */
5790
 
5791
static void
5792
do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5793
                 rtx label)
5794
{
5795
  int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5796
  do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5797
                           NULL_RTX, NULL_RTX, label, -1);
5798
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.