OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [gcc/] [expmed.c] - Blame information for rev 16

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 jlechner
/* Medium-level subroutines: convert bit-field store and extract
2
   and shifts, multiplies and divides to rtl instructions.
3
   Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4
   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
5
   Free Software Foundation, Inc.
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify it under
10
the terms of the GNU General Public License as published by the Free
11
Software Foundation; either version 2, or (at your option) any later
12
version.
13
 
14
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15
WARRANTY; without even the implied warranty of MERCHANTABILITY or
16
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17
for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING.  If not, write to the Free
21
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.  */
23
 
24
 
25
#include "config.h"
26
#include "system.h"
27
#include "coretypes.h"
28
#include "tm.h"
29
#include "toplev.h"
30
#include "rtl.h"
31
#include "tree.h"
32
#include "tm_p.h"
33
#include "flags.h"
34
#include "insn-config.h"
35
#include "expr.h"
36
#include "optabs.h"
37
#include "real.h"
38
#include "recog.h"
39
#include "langhooks.h"
40
 
41
static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
42
                                   unsigned HOST_WIDE_INT,
43
                                   unsigned HOST_WIDE_INT, rtx);
44
static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
45
                                   unsigned HOST_WIDE_INT, rtx);
46
static rtx extract_fixed_bit_field (enum machine_mode, rtx,
47
                                    unsigned HOST_WIDE_INT,
48
                                    unsigned HOST_WIDE_INT,
49
                                    unsigned HOST_WIDE_INT, rtx, int);
50
static rtx mask_rtx (enum machine_mode, int, int, int);
51
static rtx lshift_value (enum machine_mode, rtx, int, int);
52
static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
53
                                    unsigned HOST_WIDE_INT, int);
54
static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
55
static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
56
static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
57
 
58
/* Test whether a value is zero of a power of two.  */
59
#define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
60
 
61
/* Nonzero means divides or modulus operations are relatively cheap for
62
   powers of two, so don't use branches; emit the operation instead.
63
   Usually, this will mean that the MD file will emit non-branch
64
   sequences.  */
65
 
66
static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
67
static bool smod_pow2_cheap[NUM_MACHINE_MODES];
68
 
69
#ifndef SLOW_UNALIGNED_ACCESS
70
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
71
#endif
72
 
73
/* For compilers that support multiple targets with different word sizes,
74
   MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
75
   is the H8/300(H) compiler.  */
76
 
77
#ifndef MAX_BITS_PER_WORD
78
#define MAX_BITS_PER_WORD BITS_PER_WORD
79
#endif
80
 
81
/* Reduce conditional compilation elsewhere.  */
82
#ifndef HAVE_insv
83
#define HAVE_insv       0
84
#define CODE_FOR_insv   CODE_FOR_nothing
85
#define gen_insv(a,b,c,d) NULL_RTX
86
#endif
87
#ifndef HAVE_extv
88
#define HAVE_extv       0
89
#define CODE_FOR_extv   CODE_FOR_nothing
90
#define gen_extv(a,b,c,d) NULL_RTX
91
#endif
92
#ifndef HAVE_extzv
93
#define HAVE_extzv      0
94
#define CODE_FOR_extzv  CODE_FOR_nothing
95
#define gen_extzv(a,b,c,d) NULL_RTX
96
#endif
97
 
98
/* Cost of various pieces of RTL.  Note that some of these are indexed by
99
   shift count and some by mode.  */
100
static int zero_cost;
101
static int add_cost[NUM_MACHINE_MODES];
102
static int neg_cost[NUM_MACHINE_MODES];
103
static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
104
static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
105
static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
106
static int mul_cost[NUM_MACHINE_MODES];
107
static int div_cost[NUM_MACHINE_MODES];
108
static int mul_widen_cost[NUM_MACHINE_MODES];
109
static int mul_highpart_cost[NUM_MACHINE_MODES];
110
 
111
void
112
init_expmed (void)
113
{
114
  struct
115
  {
116
    struct rtx_def reg;         rtunion reg_fld[2];
117
    struct rtx_def plus;        rtunion plus_fld1;
118
    struct rtx_def neg;
119
    struct rtx_def udiv;        rtunion udiv_fld1;
120
    struct rtx_def mult;        rtunion mult_fld1;
121
    struct rtx_def div;         rtunion div_fld1;
122
    struct rtx_def mod;         rtunion mod_fld1;
123
    struct rtx_def zext;
124
    struct rtx_def wide_mult;   rtunion wide_mult_fld1;
125
    struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
126
    struct rtx_def wide_trunc;
127
    struct rtx_def shift;       rtunion shift_fld1;
128
    struct rtx_def shift_mult;  rtunion shift_mult_fld1;
129
    struct rtx_def shift_add;   rtunion shift_add_fld1;
130
    struct rtx_def shift_sub;   rtunion shift_sub_fld1;
131
  } all;
132
 
133
  rtx pow2[MAX_BITS_PER_WORD];
134
  rtx cint[MAX_BITS_PER_WORD];
135
  int m, n;
136
  enum machine_mode mode, wider_mode;
137
 
138
  zero_cost = rtx_cost (const0_rtx, 0);
139
 
140
  for (m = 1; m < MAX_BITS_PER_WORD; m++)
141
    {
142
      pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
143
      cint[m] = GEN_INT (m);
144
    }
145
 
146
  memset (&all, 0, sizeof all);
147
 
148
  PUT_CODE (&all.reg, REG);
149
  /* Avoid using hard regs in ways which may be unsupported.  */
150
  REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
151
 
152
  PUT_CODE (&all.plus, PLUS);
153
  XEXP (&all.plus, 0) = &all.reg;
154
  XEXP (&all.plus, 1) = &all.reg;
155
 
156
  PUT_CODE (&all.neg, NEG);
157
  XEXP (&all.neg, 0) = &all.reg;
158
 
159
  PUT_CODE (&all.udiv, UDIV);
160
  XEXP (&all.udiv, 0) = &all.reg;
161
  XEXP (&all.udiv, 1) = &all.reg;
162
 
163
  PUT_CODE (&all.mult, MULT);
164
  XEXP (&all.mult, 0) = &all.reg;
165
  XEXP (&all.mult, 1) = &all.reg;
166
 
167
  PUT_CODE (&all.div, DIV);
168
  XEXP (&all.div, 0) = &all.reg;
169
  XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
170
 
171
  PUT_CODE (&all.mod, MOD);
172
  XEXP (&all.mod, 0) = &all.reg;
173
  XEXP (&all.mod, 1) = XEXP (&all.div, 1);
174
 
175
  PUT_CODE (&all.zext, ZERO_EXTEND);
176
  XEXP (&all.zext, 0) = &all.reg;
177
 
178
  PUT_CODE (&all.wide_mult, MULT);
179
  XEXP (&all.wide_mult, 0) = &all.zext;
180
  XEXP (&all.wide_mult, 1) = &all.zext;
181
 
182
  PUT_CODE (&all.wide_lshr, LSHIFTRT);
183
  XEXP (&all.wide_lshr, 0) = &all.wide_mult;
184
 
185
  PUT_CODE (&all.wide_trunc, TRUNCATE);
186
  XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
187
 
188
  PUT_CODE (&all.shift, ASHIFT);
189
  XEXP (&all.shift, 0) = &all.reg;
190
 
191
  PUT_CODE (&all.shift_mult, MULT);
192
  XEXP (&all.shift_mult, 0) = &all.reg;
193
 
194
  PUT_CODE (&all.shift_add, PLUS);
195
  XEXP (&all.shift_add, 0) = &all.shift_mult;
196
  XEXP (&all.shift_add, 1) = &all.reg;
197
 
198
  PUT_CODE (&all.shift_sub, MINUS);
199
  XEXP (&all.shift_sub, 0) = &all.shift_mult;
200
  XEXP (&all.shift_sub, 1) = &all.reg;
201
 
202
  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
203
       mode != VOIDmode;
204
       mode = GET_MODE_WIDER_MODE (mode))
205
    {
206
      PUT_MODE (&all.reg, mode);
207
      PUT_MODE (&all.plus, mode);
208
      PUT_MODE (&all.neg, mode);
209
      PUT_MODE (&all.udiv, mode);
210
      PUT_MODE (&all.mult, mode);
211
      PUT_MODE (&all.div, mode);
212
      PUT_MODE (&all.mod, mode);
213
      PUT_MODE (&all.wide_trunc, mode);
214
      PUT_MODE (&all.shift, mode);
215
      PUT_MODE (&all.shift_mult, mode);
216
      PUT_MODE (&all.shift_add, mode);
217
      PUT_MODE (&all.shift_sub, mode);
218
 
219
      add_cost[mode] = rtx_cost (&all.plus, SET);
220
      neg_cost[mode] = rtx_cost (&all.neg, SET);
221
      div_cost[mode] = rtx_cost (&all.udiv, SET);
222
      mul_cost[mode] = rtx_cost (&all.mult, SET);
223
 
224
      sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
225
      smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
226
 
227
      wider_mode = GET_MODE_WIDER_MODE (mode);
228
      if (wider_mode != VOIDmode)
229
        {
230
          PUT_MODE (&all.zext, wider_mode);
231
          PUT_MODE (&all.wide_mult, wider_mode);
232
          PUT_MODE (&all.wide_lshr, wider_mode);
233
          XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
234
 
235
          mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
236
          mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
237
        }
238
 
239
      shift_cost[mode][0] = 0;
240
      shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
241
 
242
      n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
243
      for (m = 1; m < n; m++)
244
        {
245
          XEXP (&all.shift, 1) = cint[m];
246
          XEXP (&all.shift_mult, 1) = pow2[m];
247
 
248
          shift_cost[mode][m] = rtx_cost (&all.shift, SET);
249
          shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
250
          shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
251
        }
252
    }
253
}
254
 
255
/* Return an rtx representing minus the value of X.
256
   MODE is the intended mode of the result,
257
   useful if X is a CONST_INT.  */
258
 
259
rtx
260
negate_rtx (enum machine_mode mode, rtx x)
261
{
262
  rtx result = simplify_unary_operation (NEG, mode, x, mode);
263
 
264
  if (result == 0)
265
    result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
266
 
267
  return result;
268
}
269
 
270
/* Report on the availability of insv/extv/extzv and the desired mode
271
   of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
272
   is false; else the mode of the specified operand.  If OPNO is -1,
273
   all the caller cares about is whether the insn is available.  */
274
enum machine_mode
275
mode_for_extraction (enum extraction_pattern pattern, int opno)
276
{
277
  const struct insn_data *data;
278
 
279
  switch (pattern)
280
    {
281
    case EP_insv:
282
      if (HAVE_insv)
283
        {
284
          data = &insn_data[CODE_FOR_insv];
285
          break;
286
        }
287
      return MAX_MACHINE_MODE;
288
 
289
    case EP_extv:
290
      if (HAVE_extv)
291
        {
292
          data = &insn_data[CODE_FOR_extv];
293
          break;
294
        }
295
      return MAX_MACHINE_MODE;
296
 
297
    case EP_extzv:
298
      if (HAVE_extzv)
299
        {
300
          data = &insn_data[CODE_FOR_extzv];
301
          break;
302
        }
303
      return MAX_MACHINE_MODE;
304
 
305
    default:
306
      gcc_unreachable ();
307
    }
308
 
309
  if (opno == -1)
310
    return VOIDmode;
311
 
312
  /* Everyone who uses this function used to follow it with
313
     if (result == VOIDmode) result = word_mode; */
314
  if (data->operand[opno].mode == VOIDmode)
315
    return word_mode;
316
  return data->operand[opno].mode;
317
}
318
 
319
 
320
/* Generate code to store value from rtx VALUE
321
   into a bit-field within structure STR_RTX
322
   containing BITSIZE bits starting at bit BITNUM.
323
   FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
324
   ALIGN is the alignment that STR_RTX is known to have.
325
   TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
326
 
327
/* ??? Note that there are two different ideas here for how
328
   to determine the size to count bits within, for a register.
329
   One is BITS_PER_WORD, and the other is the size of operand 3
330
   of the insv pattern.
331
 
332
   If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
333
   else, we use the mode of operand 3.  */
334
 
335
rtx
336
store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
337
                 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
338
                 rtx value)
339
{
340
  unsigned int unit
341
    = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
342
  unsigned HOST_WIDE_INT offset, bitpos;
343
  rtx op0 = str_rtx;
344
  int byte_offset;
345
  rtx orig_value;
346
 
347
  enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
348
 
349
  while (GET_CODE (op0) == SUBREG)
350
    {
351
      /* The following line once was done only if WORDS_BIG_ENDIAN,
352
         but I think that is a mistake.  WORDS_BIG_ENDIAN is
353
         meaningful at a much higher level; when structures are copied
354
         between memory and regs, the higher-numbered regs
355
         always get higher addresses.  */
356
      int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
357
      int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
358
 
359
      byte_offset = 0;
360
 
361
      /* Paradoxical subregs need special handling on big endian machines.  */
362
      if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
363
        {
364
          int difference = inner_mode_size - outer_mode_size;
365
 
366
          if (WORDS_BIG_ENDIAN)
367
            byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
368
          if (BYTES_BIG_ENDIAN)
369
            byte_offset += difference % UNITS_PER_WORD;
370
        }
371
      else
372
        byte_offset = SUBREG_BYTE (op0);
373
 
374
      bitnum += byte_offset * BITS_PER_UNIT;
375
      op0 = SUBREG_REG (op0);
376
    }
377
 
378
  /* No action is needed if the target is a register and if the field
379
     lies completely outside that register.  This can occur if the source
380
     code contains an out-of-bounds access to a small array.  */
381
  if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
382
    return value;
383
 
384
  /* Use vec_set patterns for inserting parts of vectors whenever
385
     available.  */
386
  if (VECTOR_MODE_P (GET_MODE (op0))
387
      && !MEM_P (op0)
388
      && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
389
          != CODE_FOR_nothing)
390
      && fieldmode == GET_MODE_INNER (GET_MODE (op0))
391
      && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
392
      && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
393
    {
394
      enum machine_mode outermode = GET_MODE (op0);
395
      enum machine_mode innermode = GET_MODE_INNER (outermode);
396
      int icode = (int) vec_set_optab->handlers[outermode].insn_code;
397
      int pos = bitnum / GET_MODE_BITSIZE (innermode);
398
      rtx rtxpos = GEN_INT (pos);
399
      rtx src = value;
400
      rtx dest = op0;
401
      rtx pat, seq;
402
      enum machine_mode mode0 = insn_data[icode].operand[0].mode;
403
      enum machine_mode mode1 = insn_data[icode].operand[1].mode;
404
      enum machine_mode mode2 = insn_data[icode].operand[2].mode;
405
 
406
      start_sequence ();
407
 
408
      if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
409
        src = copy_to_mode_reg (mode1, src);
410
 
411
      if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
412
        rtxpos = copy_to_mode_reg (mode1, rtxpos);
413
 
414
      /* We could handle this, but we should always be called with a pseudo
415
         for our targets and all insns should take them as outputs.  */
416
      gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
417
                  && (*insn_data[icode].operand[1].predicate) (src, mode1)
418
                  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
419
      pat = GEN_FCN (icode) (dest, src, rtxpos);
420
      seq = get_insns ();
421
      end_sequence ();
422
      if (pat)
423
        {
424
          emit_insn (seq);
425
          emit_insn (pat);
426
          return dest;
427
        }
428
    }
429
 
430
  /* If the target is a register, overwriting the entire object, or storing
431
     a full-word or multi-word field can be done with just a SUBREG.
432
 
433
     If the target is memory, storing any naturally aligned field can be
434
     done with a simple store.  For targets that support fast unaligned
435
     memory, any naturally sized, unit aligned field can be done directly.  */
436
 
437
  offset = bitnum / unit;
438
  bitpos = bitnum % unit;
439
  byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
440
                + (offset * UNITS_PER_WORD);
441
 
442
  if (bitpos == 0
443
      && bitsize == GET_MODE_BITSIZE (fieldmode)
444
      && (!MEM_P (op0)
445
          ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
446
             || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
447
             && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
448
          : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
449
             || (offset * BITS_PER_UNIT % bitsize == 0
450
                 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
451
    {
452
      if (MEM_P (op0))
453
        op0 = adjust_address (op0, fieldmode, offset);
454
      else if (GET_MODE (op0) != fieldmode)
455
        op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
456
                                   byte_offset);
457
      emit_move_insn (op0, value);
458
      return value;
459
    }
460
 
461
  /* Make sure we are playing with integral modes.  Pun with subregs
462
     if we aren't.  This must come after the entire register case above,
463
     since that case is valid for any mode.  The following cases are only
464
     valid for integral modes.  */
465
  {
466
    enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
467
    if (imode != GET_MODE (op0))
468
      {
469
        if (MEM_P (op0))
470
          op0 = adjust_address (op0, imode, 0);
471
        else
472
          {
473
            gcc_assert (imode != BLKmode);
474
            op0 = gen_lowpart (imode, op0);
475
          }
476
      }
477
  }
478
 
479
  /* We may be accessing data outside the field, which means
480
     we can alias adjacent data.  */
481
  if (MEM_P (op0))
482
    {
483
      op0 = shallow_copy_rtx (op0);
484
      set_mem_alias_set (op0, 0);
485
      set_mem_expr (op0, 0);
486
    }
487
 
488
  /* If OP0 is a register, BITPOS must count within a word.
489
     But as we have it, it counts within whatever size OP0 now has.
490
     On a bigendian machine, these are not the same, so convert.  */
491
  if (BYTES_BIG_ENDIAN
492
      && !MEM_P (op0)
493
      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
494
    bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
495
 
496
  /* Storing an lsb-aligned field in a register
497
     can be done with a movestrict instruction.  */
498
 
499
  if (!MEM_P (op0)
500
      && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
501
      && bitsize == GET_MODE_BITSIZE (fieldmode)
502
      && (movstrict_optab->handlers[fieldmode].insn_code
503
          != CODE_FOR_nothing))
504
    {
505
      int icode = movstrict_optab->handlers[fieldmode].insn_code;
506
 
507
      /* Get appropriate low part of the value being stored.  */
508
      if (GET_CODE (value) == CONST_INT || REG_P (value))
509
        value = gen_lowpart (fieldmode, value);
510
      else if (!(GET_CODE (value) == SYMBOL_REF
511
                 || GET_CODE (value) == LABEL_REF
512
                 || GET_CODE (value) == CONST))
513
        value = convert_to_mode (fieldmode, value, 0);
514
 
515
      if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
516
        value = copy_to_mode_reg (fieldmode, value);
517
 
518
      if (GET_CODE (op0) == SUBREG)
519
        {
520
          /* Else we've got some float mode source being extracted into
521
             a different float mode destination -- this combination of
522
             subregs results in Severe Tire Damage.  */
523
          gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
524
                      || GET_MODE_CLASS (fieldmode) == MODE_INT
525
                      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
526
          op0 = SUBREG_REG (op0);
527
        }
528
 
529
      emit_insn (GEN_FCN (icode)
530
                 (gen_rtx_SUBREG (fieldmode, op0,
531
                                  (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
532
                                  + (offset * UNITS_PER_WORD)),
533
                                  value));
534
 
535
      return value;
536
    }
537
 
538
  /* Handle fields bigger than a word.  */
539
 
540
  if (bitsize > BITS_PER_WORD)
541
    {
542
      /* Here we transfer the words of the field
543
         in the order least significant first.
544
         This is because the most significant word is the one which may
545
         be less than full.
546
         However, only do that if the value is not BLKmode.  */
547
 
548
      unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
549
      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
550
      unsigned int i;
551
 
552
      /* This is the mode we must force value to, so that there will be enough
553
         subwords to extract.  Note that fieldmode will often (always?) be
554
         VOIDmode, because that is what store_field uses to indicate that this
555
         is a bit field, but passing VOIDmode to operand_subword_force
556
         is not allowed.  */
557
      fieldmode = GET_MODE (value);
558
      if (fieldmode == VOIDmode)
559
        fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
560
 
561
      for (i = 0; i < nwords; i++)
562
        {
563
          /* If I is 0, use the low-order word in both field and target;
564
             if I is 1, use the next to lowest word; and so on.  */
565
          unsigned int wordnum = (backwards ? nwords - i - 1 : i);
566
          unsigned int bit_offset = (backwards
567
                                     ? MAX ((int) bitsize - ((int) i + 1)
568
                                            * BITS_PER_WORD,
569
                                            0)
570
                                     : (int) i * BITS_PER_WORD);
571
 
572
          store_bit_field (op0, MIN (BITS_PER_WORD,
573
                                     bitsize - i * BITS_PER_WORD),
574
                           bitnum + bit_offset, word_mode,
575
                           operand_subword_force (value, wordnum, fieldmode));
576
        }
577
      return value;
578
    }
579
 
580
  /* From here on we can assume that the field to be stored in is
581
     a full-word (whatever type that is), since it is shorter than a word.  */
582
 
583
  /* OFFSET is the number of words or bytes (UNIT says which)
584
     from STR_RTX to the first word or byte containing part of the field.  */
585
 
586
  if (!MEM_P (op0))
587
    {
588
      if (offset != 0
589
          || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
590
        {
591
          if (!REG_P (op0))
592
            {
593
              /* Since this is a destination (lvalue), we can't copy
594
                 it to a pseudo.  We can remove a SUBREG that does not
595
                 change the size of the operand.  Such a SUBREG may
596
                 have been added above.  */
597
              gcc_assert (GET_CODE (op0) == SUBREG
598
                          && (GET_MODE_SIZE (GET_MODE (op0))
599
                              == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
600
              op0 = SUBREG_REG (op0);
601
            }
602
          op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
603
                                op0, (offset * UNITS_PER_WORD));
604
        }
605
      offset = 0;
606
    }
607
 
608
  /* If VALUE has a floating-point or complex mode, access it as an
609
     integer of the corresponding size.  This can occur on a machine
610
     with 64 bit registers that uses SFmode for float.  It can also
611
     occur for unaligned float or complex fields.  */
612
  orig_value = value;
613
  if (GET_MODE (value) != VOIDmode
614
      && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
615
      && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
616
    {
617
      value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
618
      emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
619
    }
620
 
621
  /* Now OFFSET is nonzero only if OP0 is memory
622
     and is therefore always measured in bytes.  */
623
 
624
  if (HAVE_insv
625
      && GET_MODE (value) != BLKmode
626
      && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
627
      && bitsize > 0
628
      && GET_MODE_BITSIZE (op_mode) >= bitsize
629
      && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
630
            && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
631
      && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
632
                                                        VOIDmode))
633
    {
634
      int xbitpos = bitpos;
635
      rtx value1;
636
      rtx xop0 = op0;
637
      rtx last = get_last_insn ();
638
      rtx pat;
639
      enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
640
      int save_volatile_ok = volatile_ok;
641
 
642
      volatile_ok = 1;
643
 
644
      /* If this machine's insv can only insert into a register, copy OP0
645
         into a register and save it back later.  */
646
      if (MEM_P (op0)
647
          && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
648
                (op0, VOIDmode)))
649
        {
650
          rtx tempreg;
651
          enum machine_mode bestmode;
652
 
653
          /* Get the mode to use for inserting into this field.  If OP0 is
654
             BLKmode, get the smallest mode consistent with the alignment. If
655
             OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
656
             mode. Otherwise, use the smallest mode containing the field.  */
657
 
658
          if (GET_MODE (op0) == BLKmode
659
              || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
660
            bestmode
661
              = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
662
                               MEM_VOLATILE_P (op0));
663
          else
664
            bestmode = GET_MODE (op0);
665
 
666
          if (bestmode == VOIDmode
667
              || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode)
668
              || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
669
                  && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
670
            goto insv_loses;
671
 
672
          /* Adjust address to point to the containing unit of that mode.
673
             Compute offset as multiple of this unit, counting in bytes.  */
674
          unit = GET_MODE_BITSIZE (bestmode);
675
          offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
676
          bitpos = bitnum % unit;
677
          op0 = adjust_address (op0, bestmode,  offset);
678
 
679
          /* Fetch that unit, store the bitfield in it, then store
680
             the unit.  */
681
          tempreg = copy_to_reg (op0);
682
          store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
683
          emit_move_insn (op0, tempreg);
684
          return value;
685
        }
686
      volatile_ok = save_volatile_ok;
687
 
688
      /* Add OFFSET into OP0's address.  */
689
      if (MEM_P (xop0))
690
        xop0 = adjust_address (xop0, byte_mode, offset);
691
 
692
      /* If xop0 is a register, we need it in MAXMODE
693
         to make it acceptable to the format of insv.  */
694
      if (GET_CODE (xop0) == SUBREG)
695
        /* We can't just change the mode, because this might clobber op0,
696
           and we will need the original value of op0 if insv fails.  */
697
        xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
698
      if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
699
        xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
700
 
701
      /* On big-endian machines, we count bits from the most significant.
702
         If the bit field insn does not, we must invert.  */
703
 
704
      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
705
        xbitpos = unit - bitsize - xbitpos;
706
 
707
      /* We have been counting XBITPOS within UNIT.
708
         Count instead within the size of the register.  */
709
      if (BITS_BIG_ENDIAN && !MEM_P (xop0))
710
        xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
711
 
712
      unit = GET_MODE_BITSIZE (maxmode);
713
 
714
      /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
715
      value1 = value;
716
      if (GET_MODE (value) != maxmode)
717
        {
718
          if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
719
            {
720
              /* Optimization: Don't bother really extending VALUE
721
                 if it has all the bits we will actually use.  However,
722
                 if we must narrow it, be sure we do it correctly.  */
723
 
724
              if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
725
                {
726
                  rtx tmp;
727
 
728
                  tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
729
                  if (! tmp)
730
                    tmp = simplify_gen_subreg (maxmode,
731
                                               force_reg (GET_MODE (value),
732
                                                          value1),
733
                                               GET_MODE (value), 0);
734
                  value1 = tmp;
735
                }
736
              else
737
                value1 = gen_lowpart (maxmode, value1);
738
            }
739
          else if (GET_CODE (value) == CONST_INT)
740
            value1 = gen_int_mode (INTVAL (value), maxmode);
741
          else
742
            /* Parse phase is supposed to make VALUE's data type
743
               match that of the component reference, which is a type
744
               at least as wide as the field; so VALUE should have
745
               a mode that corresponds to that type.  */
746
            gcc_assert (CONSTANT_P (value));
747
        }
748
 
749
      /* If this machine's insv insists on a register,
750
         get VALUE1 into a register.  */
751
      if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
752
             (value1, maxmode)))
753
        value1 = force_reg (maxmode, value1);
754
 
755
      pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
756
      if (pat)
757
        emit_insn (pat);
758
      else
759
        {
760
          delete_insns_since (last);
761
          store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
762
        }
763
    }
764
  else
765
    insv_loses:
766
    /* Insv is not available; store using shifts and boolean ops.  */
767
    store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
768
  return value;
769
}
770
 
771
/* Use shifts and boolean operations to store VALUE
772
   into a bit field of width BITSIZE
773
   in a memory location specified by OP0 except offset by OFFSET bytes.
774
     (OFFSET must be 0 if OP0 is a register.)
775
   The field starts at position BITPOS within the byte.
776
    (If OP0 is a register, it may be a full word or a narrower mode,
777
     but BITPOS still counts within a full word,
778
     which is significant on bigendian machines.)  */
779
 
780
static void
781
store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
782
                       unsigned HOST_WIDE_INT bitsize,
783
                       unsigned HOST_WIDE_INT bitpos, rtx value)
784
{
785
  enum machine_mode mode;
786
  unsigned int total_bits = BITS_PER_WORD;
787
  rtx temp;
788
  int all_zero = 0;
789
  int all_one = 0;
790
 
791
  /* There is a case not handled here:
792
     a structure with a known alignment of just a halfword
793
     and a field split across two aligned halfwords within the structure.
794
     Or likewise a structure with a known alignment of just a byte
795
     and a field split across two bytes.
796
     Such cases are not supposed to be able to occur.  */
797
 
798
  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
799
    {
800
      gcc_assert (!offset);
801
      /* Special treatment for a bit field split across two registers.  */
802
      if (bitsize + bitpos > BITS_PER_WORD)
803
        {
804
          store_split_bit_field (op0, bitsize, bitpos, value);
805
          return;
806
        }
807
    }
808
  else
809
    {
810
      /* Get the proper mode to use for this field.  We want a mode that
811
         includes the entire field.  If such a mode would be larger than
812
         a word, we won't be doing the extraction the normal way.
813
         We don't want a mode bigger than the destination.  */
814
 
815
      mode = GET_MODE (op0);
816
      if (GET_MODE_BITSIZE (mode) == 0
817
          || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
818
        mode = word_mode;
819
      mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
820
                            MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
821
 
822
      if (mode == VOIDmode)
823
        {
824
          /* The only way this should occur is if the field spans word
825
             boundaries.  */
826
          store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
827
                                 value);
828
          return;
829
        }
830
 
831
      total_bits = GET_MODE_BITSIZE (mode);
832
 
833
      /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
834
         be in the range 0 to total_bits-1, and put any excess bytes in
835
         OFFSET.  */
836
      if (bitpos >= total_bits)
837
        {
838
          offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
839
          bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
840
                     * BITS_PER_UNIT);
841
        }
842
 
843
      /* Get ref to an aligned byte, halfword, or word containing the field.
844
         Adjust BITPOS to be position within a word,
845
         and OFFSET to be the offset of that word.
846
         Then alter OP0 to refer to that word.  */
847
      bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
848
      offset -= (offset % (total_bits / BITS_PER_UNIT));
849
      op0 = adjust_address (op0, mode, offset);
850
    }
851
 
852
  mode = GET_MODE (op0);
853
 
854
  /* Now MODE is either some integral mode for a MEM as OP0,
855
     or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
856
     The bit field is contained entirely within OP0.
857
     BITPOS is the starting bit number within OP0.
858
     (OP0's mode may actually be narrower than MODE.)  */
859
 
860
  if (BYTES_BIG_ENDIAN)
861
      /* BITPOS is the distance between our msb
862
         and that of the containing datum.
863
         Convert it to the distance from the lsb.  */
864
      bitpos = total_bits - bitsize - bitpos;
865
 
866
  /* Now BITPOS is always the distance between our lsb
867
     and that of OP0.  */
868
 
869
  /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
870
     we must first convert its mode to MODE.  */
871
 
872
  if (GET_CODE (value) == CONST_INT)
873
    {
874
      HOST_WIDE_INT v = INTVAL (value);
875
 
876
      if (bitsize < HOST_BITS_PER_WIDE_INT)
877
        v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
878
 
879
      if (v == 0)
880
        all_zero = 1;
881
      else if ((bitsize < HOST_BITS_PER_WIDE_INT
882
                && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
883
               || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
884
        all_one = 1;
885
 
886
      value = lshift_value (mode, value, bitpos, bitsize);
887
    }
888
  else
889
    {
890
      int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
891
                      && bitpos + bitsize != GET_MODE_BITSIZE (mode));
892
 
893
      if (GET_MODE (value) != mode)
894
        {
895
          if ((REG_P (value) || GET_CODE (value) == SUBREG)
896
              && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
897
            value = gen_lowpart (mode, value);
898
          else
899
            value = convert_to_mode (mode, value, 1);
900
        }
901
 
902
      if (must_and)
903
        value = expand_binop (mode, and_optab, value,
904
                              mask_rtx (mode, 0, bitsize, 0),
905
                              NULL_RTX, 1, OPTAB_LIB_WIDEN);
906
      if (bitpos > 0)
907
        value = expand_shift (LSHIFT_EXPR, mode, value,
908
                              build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
909
    }
910
 
911
  /* Now clear the chosen bits in OP0,
912
     except that if VALUE is -1 we need not bother.  */
913
  /* We keep the intermediates in registers to allow CSE to combine
914
     consecutive bitfield assignments.  */
915
 
916
  temp = force_reg (mode, op0);
917
 
918
  if (! all_one)
919
    {
920
      temp = expand_binop (mode, and_optab, temp,
921
                           mask_rtx (mode, bitpos, bitsize, 1),
922
                           NULL_RTX, 1, OPTAB_LIB_WIDEN);
923
      temp = force_reg (mode, temp);
924
    }
925
 
926
  /* Now logical-or VALUE into OP0, unless it is zero.  */
927
 
928
  if (! all_zero)
929
    {
930
      temp = expand_binop (mode, ior_optab, temp, value,
931
                           NULL_RTX, 1, OPTAB_LIB_WIDEN);
932
      temp = force_reg (mode, temp);
933
    }
934
 
935
  if (op0 != temp)
936
    emit_move_insn (op0, temp);
937
}
938
 
939
/* Store a bit field that is split across multiple accessible memory objects.
940
 
941
   OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
942
   BITSIZE is the field width; BITPOS the position of its first bit
943
   (within the word).
944
   VALUE is the value to store.
945
 
946
   This does not yet handle fields wider than BITS_PER_WORD.  */
947
 
948
static void
949
store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
950
                       unsigned HOST_WIDE_INT bitpos, rtx value)
951
{
952
  unsigned int unit;
953
  unsigned int bitsdone = 0;
954
 
955
  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
956
     much at a time.  */
957
  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
958
    unit = BITS_PER_WORD;
959
  else
960
    unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
961
 
962
  /* If VALUE is a constant other than a CONST_INT, get it into a register in
963
     WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
964
     that VALUE might be a floating-point constant.  */
965
  if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
966
    {
967
      rtx word = gen_lowpart_common (word_mode, value);
968
 
969
      if (word && (value != word))
970
        value = word;
971
      else
972
        value = gen_lowpart_common (word_mode,
973
                                    force_reg (GET_MODE (value) != VOIDmode
974
                                               ? GET_MODE (value)
975
                                               : word_mode, value));
976
    }
977
 
978
  while (bitsdone < bitsize)
979
    {
980
      unsigned HOST_WIDE_INT thissize;
981
      rtx part, word;
982
      unsigned HOST_WIDE_INT thispos;
983
      unsigned HOST_WIDE_INT offset;
984
 
985
      offset = (bitpos + bitsdone) / unit;
986
      thispos = (bitpos + bitsdone) % unit;
987
 
988
      /* THISSIZE must not overrun a word boundary.  Otherwise,
989
         store_fixed_bit_field will call us again, and we will mutually
990
         recurse forever.  */
991
      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
992
      thissize = MIN (thissize, unit - thispos);
993
 
994
      if (BYTES_BIG_ENDIAN)
995
        {
996
          int total_bits;
997
 
998
          /* We must do an endian conversion exactly the same way as it is
999
             done in extract_bit_field, so that the two calls to
1000
             extract_fixed_bit_field will have comparable arguments.  */
1001
          if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1002
            total_bits = BITS_PER_WORD;
1003
          else
1004
            total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1005
 
1006
          /* Fetch successively less significant portions.  */
1007
          if (GET_CODE (value) == CONST_INT)
1008
            part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1009
                             >> (bitsize - bitsdone - thissize))
1010
                            & (((HOST_WIDE_INT) 1 << thissize) - 1));
1011
          else
1012
            /* The args are chosen so that the last part includes the
1013
               lsb.  Give extract_bit_field the value it needs (with
1014
               endianness compensation) to fetch the piece we want.  */
1015
            part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1016
                                            total_bits - bitsize + bitsdone,
1017
                                            NULL_RTX, 1);
1018
        }
1019
      else
1020
        {
1021
          /* Fetch successively more significant portions.  */
1022
          if (GET_CODE (value) == CONST_INT)
1023
            part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1024
                             >> bitsdone)
1025
                            & (((HOST_WIDE_INT) 1 << thissize) - 1));
1026
          else
1027
            part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1028
                                            bitsdone, NULL_RTX, 1);
1029
        }
1030
 
1031
      /* If OP0 is a register, then handle OFFSET here.
1032
 
1033
         When handling multiword bitfields, extract_bit_field may pass
1034
         down a word_mode SUBREG of a larger REG for a bitfield that actually
1035
         crosses a word boundary.  Thus, for a SUBREG, we must find
1036
         the current word starting from the base register.  */
1037
      if (GET_CODE (op0) == SUBREG)
1038
        {
1039
          int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1040
          word = operand_subword_force (SUBREG_REG (op0), word_offset,
1041
                                        GET_MODE (SUBREG_REG (op0)));
1042
          offset = 0;
1043
        }
1044
      else if (REG_P (op0))
1045
        {
1046
          word = operand_subword_force (op0, offset, GET_MODE (op0));
1047
          offset = 0;
1048
        }
1049
      else
1050
        word = op0;
1051
 
1052
      /* OFFSET is in UNITs, and UNIT is in bits.
1053
         store_fixed_bit_field wants offset in bytes.  */
1054
      store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1055
                             thispos, part);
1056
      bitsdone += thissize;
1057
    }
1058
}
1059
 
1060
/* Generate code to extract a byte-field from STR_RTX
1061
   containing BITSIZE bits, starting at BITNUM,
1062
   and put it in TARGET if possible (if TARGET is nonzero).
1063
   Regardless of TARGET, we return the rtx for where the value is placed.
1064
 
1065
   STR_RTX is the structure containing the byte (a REG or MEM).
1066
   UNSIGNEDP is nonzero if this is an unsigned bit field.
1067
   MODE is the natural mode of the field value once extracted.
1068
   TMODE is the mode the caller would like the value to have;
1069
   but the value may be returned with type MODE instead.
1070
 
1071
   TOTAL_SIZE is the size in bytes of the containing structure,
1072
   or -1 if varying.
1073
 
1074
   If a TARGET is specified and we can store in it at no extra cost,
1075
   we do so, and return TARGET.
1076
   Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1077
   if they are equally easy.  */
1078
 
1079
rtx
1080
extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1081
                   unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1082
                   enum machine_mode mode, enum machine_mode tmode)
1083
{
1084
  unsigned int unit
1085
    = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1086
  unsigned HOST_WIDE_INT offset, bitpos;
1087
  rtx op0 = str_rtx;
1088
  rtx spec_target = target;
1089
  rtx spec_target_subreg = 0;
1090
  enum machine_mode int_mode;
1091
  enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1092
  enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1093
  enum machine_mode mode1;
1094
  int byte_offset;
1095
 
1096
  if (tmode == VOIDmode)
1097
    tmode = mode;
1098
 
1099
  while (GET_CODE (op0) == SUBREG)
1100
    {
1101
      bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1102
      op0 = SUBREG_REG (op0);
1103
    }
1104
 
1105
  /* If we have an out-of-bounds access to a register, just return an
1106
     uninitialized register of the required mode.  This can occur if the
1107
     source code contains an out-of-bounds access to a small array.  */
1108
  if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1109
    return gen_reg_rtx (tmode);
1110
 
1111
  if (REG_P (op0)
1112
      && mode == GET_MODE (op0)
1113
      && bitnum == 0
1114
      && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1115
    {
1116
      /* We're trying to extract a full register from itself.  */
1117
      return op0;
1118
    }
1119
 
1120
  /* Use vec_extract patterns for extracting parts of vectors whenever
1121
     available.  */
1122
  if (VECTOR_MODE_P (GET_MODE (op0))
1123
      && !MEM_P (op0)
1124
      && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1125
          != CODE_FOR_nothing)
1126
      && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1127
          == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1128
    {
1129
      enum machine_mode outermode = GET_MODE (op0);
1130
      enum machine_mode innermode = GET_MODE_INNER (outermode);
1131
      int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1132
      unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1133
      rtx rtxpos = GEN_INT (pos);
1134
      rtx src = op0;
1135
      rtx dest = NULL, pat, seq;
1136
      enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1137
      enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1138
      enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1139
 
1140
      if (innermode == tmode || innermode == mode)
1141
        dest = target;
1142
 
1143
      if (!dest)
1144
        dest = gen_reg_rtx (innermode);
1145
 
1146
      start_sequence ();
1147
 
1148
      if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1149
        dest = copy_to_mode_reg (mode0, dest);
1150
 
1151
      if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1152
        src = copy_to_mode_reg (mode1, src);
1153
 
1154
      if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1155
        rtxpos = copy_to_mode_reg (mode1, rtxpos);
1156
 
1157
      /* We could handle this, but we should always be called with a pseudo
1158
         for our targets and all insns should take them as outputs.  */
1159
      gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1160
                  && (*insn_data[icode].operand[1].predicate) (src, mode1)
1161
                  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1162
 
1163
      pat = GEN_FCN (icode) (dest, src, rtxpos);
1164
      seq = get_insns ();
1165
      end_sequence ();
1166
      if (pat)
1167
        {
1168
          emit_insn (seq);
1169
          emit_insn (pat);
1170
          return dest;
1171
        }
1172
    }
1173
 
1174
  /* Make sure we are playing with integral modes.  Pun with subregs
1175
     if we aren't.  */
1176
  {
1177
    enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1178
    if (imode != GET_MODE (op0))
1179
      {
1180
        if (MEM_P (op0))
1181
          op0 = adjust_address (op0, imode, 0);
1182
        else
1183
          {
1184
            gcc_assert (imode != BLKmode);
1185
            op0 = gen_lowpart (imode, op0);
1186
 
1187
            /* If we got a SUBREG, force it into a register since we
1188
               aren't going to be able to do another SUBREG on it.  */
1189
            if (GET_CODE (op0) == SUBREG)
1190
              op0 = force_reg (imode, op0);
1191
          }
1192
      }
1193
  }
1194
 
1195
  /* We may be accessing data outside the field, which means
1196
     we can alias adjacent data.  */
1197
  if (MEM_P (op0))
1198
    {
1199
      op0 = shallow_copy_rtx (op0);
1200
      set_mem_alias_set (op0, 0);
1201
      set_mem_expr (op0, 0);
1202
    }
1203
 
1204
  /* Extraction of a full-word or multi-word value from a structure
1205
     in a register or aligned memory can be done with just a SUBREG.
1206
     A subword value in the least significant part of a register
1207
     can also be extracted with a SUBREG.  For this, we need the
1208
     byte offset of the value in op0.  */
1209
 
1210
  bitpos = bitnum % unit;
1211
  offset = bitnum / unit;
1212
  byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1213
 
1214
  /* If OP0 is a register, BITPOS must count within a word.
1215
     But as we have it, it counts within whatever size OP0 now has.
1216
     On a bigendian machine, these are not the same, so convert.  */
1217
  if (BYTES_BIG_ENDIAN
1218
      && !MEM_P (op0)
1219
      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1220
    bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1221
 
1222
  /* ??? We currently assume TARGET is at least as big as BITSIZE.
1223
     If that's wrong, the solution is to test for it and set TARGET to 0
1224
     if needed.  */
1225
 
1226
  /* Only scalar integer modes can be converted via subregs.  There is an
1227
     additional problem for FP modes here in that they can have a precision
1228
     which is different from the size.  mode_for_size uses precision, but
1229
     we want a mode based on the size, so we must avoid calling it for FP
1230
     modes.  */
1231
  mode1  = (SCALAR_INT_MODE_P (tmode)
1232
            ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1233
            : mode);
1234
 
1235
  if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1236
        && bitpos % BITS_PER_WORD == 0)
1237
       || (mode1 != BLKmode
1238
           /* ??? The big endian test here is wrong.  This is correct
1239
              if the value is in a register, and if mode_for_size is not
1240
              the same mode as op0.  This causes us to get unnecessarily
1241
              inefficient code from the Thumb port when -mbig-endian.  */
1242
           && (BYTES_BIG_ENDIAN
1243
               ? bitpos + bitsize == BITS_PER_WORD
1244
               : bitpos == 0)))
1245
      && ((!MEM_P (op0)
1246
           && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1247
                                     GET_MODE_BITSIZE (GET_MODE (op0)))
1248
           && GET_MODE_SIZE (mode1) != 0
1249
           && byte_offset % GET_MODE_SIZE (mode1) == 0)
1250
          || (MEM_P (op0)
1251
              && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1252
                  || (offset * BITS_PER_UNIT % bitsize == 0
1253
                      && MEM_ALIGN (op0) % bitsize == 0)))))
1254
    {
1255
      if (mode1 != GET_MODE (op0))
1256
        {
1257
          if (MEM_P (op0))
1258
            op0 = adjust_address (op0, mode1, offset);
1259
          else
1260
            {
1261
              rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1262
                                             byte_offset);
1263
              if (sub == NULL)
1264
                goto no_subreg_mode_swap;
1265
              op0 = sub;
1266
            }
1267
        }
1268
      if (mode1 != mode)
1269
        return convert_to_mode (tmode, op0, unsignedp);
1270
      return op0;
1271
    }
1272
 no_subreg_mode_swap:
1273
 
1274
  /* Handle fields bigger than a word.  */
1275
 
1276
  if (bitsize > BITS_PER_WORD)
1277
    {
1278
      /* Here we transfer the words of the field
1279
         in the order least significant first.
1280
         This is because the most significant word is the one which may
1281
         be less than full.  */
1282
 
1283
      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1284
      unsigned int i;
1285
 
1286
      if (target == 0 || !REG_P (target))
1287
        target = gen_reg_rtx (mode);
1288
 
1289
      /* Indicate for flow that the entire target reg is being set.  */
1290
      emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1291
 
1292
      for (i = 0; i < nwords; i++)
1293
        {
1294
          /* If I is 0, use the low-order word in both field and target;
1295
             if I is 1, use the next to lowest word; and so on.  */
1296
          /* Word number in TARGET to use.  */
1297
          unsigned int wordnum
1298
            = (WORDS_BIG_ENDIAN
1299
               ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1300
               : i);
1301
          /* Offset from start of field in OP0.  */
1302
          unsigned int bit_offset = (WORDS_BIG_ENDIAN
1303
                                     ? MAX (0, ((int) bitsize - ((int) i + 1)
1304
                                                * (int) BITS_PER_WORD))
1305
                                     : (int) i * BITS_PER_WORD);
1306
          rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1307
          rtx result_part
1308
            = extract_bit_field (op0, MIN (BITS_PER_WORD,
1309
                                           bitsize - i * BITS_PER_WORD),
1310
                                 bitnum + bit_offset, 1, target_part, mode,
1311
                                 word_mode);
1312
 
1313
          gcc_assert (target_part);
1314
 
1315
          if (result_part != target_part)
1316
            emit_move_insn (target_part, result_part);
1317
        }
1318
 
1319
      if (unsignedp)
1320
        {
1321
          /* Unless we've filled TARGET, the upper regs in a multi-reg value
1322
             need to be zero'd out.  */
1323
          if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1324
            {
1325
              unsigned int i, total_words;
1326
 
1327
              total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1328
              for (i = nwords; i < total_words; i++)
1329
                emit_move_insn
1330
                  (operand_subword (target,
1331
                                    WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1332
                                    1, VOIDmode),
1333
                   const0_rtx);
1334
            }
1335
          return target;
1336
        }
1337
 
1338
      /* Signed bit field: sign-extend with two arithmetic shifts.  */
1339
      target = expand_shift (LSHIFT_EXPR, mode, target,
1340
                             build_int_cst (NULL_TREE,
1341
                                            GET_MODE_BITSIZE (mode) - bitsize),
1342
                             NULL_RTX, 0);
1343
      return expand_shift (RSHIFT_EXPR, mode, target,
1344
                           build_int_cst (NULL_TREE,
1345
                                          GET_MODE_BITSIZE (mode) - bitsize),
1346
                           NULL_RTX, 0);
1347
    }
1348
 
1349
  /* From here on we know the desired field is smaller than a word.  */
1350
 
1351
  /* Check if there is a correspondingly-sized integer field, so we can
1352
     safely extract it as one size of integer, if necessary; then
1353
     truncate or extend to the size that is wanted; then use SUBREGs or
1354
     convert_to_mode to get one of the modes we really wanted.  */
1355
 
1356
  int_mode = int_mode_for_mode (tmode);
1357
  if (int_mode == BLKmode)
1358
    int_mode = int_mode_for_mode (mode);
1359
  /* Should probably push op0 out to memory and then do a load.  */
1360
  gcc_assert (int_mode != BLKmode);
1361
 
1362
  /* OFFSET is the number of words or bytes (UNIT says which)
1363
     from STR_RTX to the first word or byte containing part of the field.  */
1364
  if (!MEM_P (op0))
1365
    {
1366
      if (offset != 0
1367
          || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1368
        {
1369
          if (!REG_P (op0))
1370
            op0 = copy_to_reg (op0);
1371
          op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1372
                                op0, (offset * UNITS_PER_WORD));
1373
        }
1374
      offset = 0;
1375
    }
1376
 
1377
  /* Now OFFSET is nonzero only for memory operands.  */
1378
 
1379
  if (unsignedp)
1380
    {
1381
      if (HAVE_extzv
1382
          && bitsize > 0
1383
          && GET_MODE_BITSIZE (extzv_mode) >= bitsize
1384
          && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1385
                && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1386
        {
1387
          unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1388
          rtx bitsize_rtx, bitpos_rtx;
1389
          rtx last = get_last_insn ();
1390
          rtx xop0 = op0;
1391
          rtx xtarget = target;
1392
          rtx xspec_target = spec_target;
1393
          rtx xspec_target_subreg = spec_target_subreg;
1394
          rtx pat;
1395
          enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1396
 
1397
          if (MEM_P (xop0))
1398
            {
1399
              int save_volatile_ok = volatile_ok;
1400
              volatile_ok = 1;
1401
 
1402
              /* Is the memory operand acceptable?  */
1403
              if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1404
                     (xop0, GET_MODE (xop0))))
1405
                {
1406
                  /* No, load into a reg and extract from there.  */
1407
                  enum machine_mode bestmode;
1408
 
1409
                  /* Get the mode to use for inserting into this field.  If
1410
                     OP0 is BLKmode, get the smallest mode consistent with the
1411
                     alignment. If OP0 is a non-BLKmode object that is no
1412
                     wider than MAXMODE, use its mode. Otherwise, use the
1413
                     smallest mode containing the field.  */
1414
 
1415
                  if (GET_MODE (xop0) == BLKmode
1416
                      || (GET_MODE_SIZE (GET_MODE (op0))
1417
                          > GET_MODE_SIZE (maxmode)))
1418
                    bestmode = get_best_mode (bitsize, bitnum,
1419
                                              MEM_ALIGN (xop0), maxmode,
1420
                                              MEM_VOLATILE_P (xop0));
1421
                  else
1422
                    bestmode = GET_MODE (xop0);
1423
 
1424
                  if (bestmode == VOIDmode
1425
                      || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1426
                          && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1427
                    goto extzv_loses;
1428
 
1429
                  /* Compute offset as multiple of this unit,
1430
                     counting in bytes.  */
1431
                  unit = GET_MODE_BITSIZE (bestmode);
1432
                  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1433
                  xbitpos = bitnum % unit;
1434
                  xop0 = adjust_address (xop0, bestmode, xoffset);
1435
 
1436
                  /* Make sure register is big enough for the whole field. */
1437
                  if (xoffset * BITS_PER_UNIT + unit
1438
                      < offset * BITS_PER_UNIT + bitsize)
1439
                    goto extzv_loses;
1440
 
1441
                  /* Fetch it to a register in that size.  */
1442
                  xop0 = force_reg (bestmode, xop0);
1443
 
1444
                  /* XBITPOS counts within UNIT, which is what is expected.  */
1445
                }
1446
              else
1447
                /* Get ref to first byte containing part of the field.  */
1448
                xop0 = adjust_address (xop0, byte_mode, xoffset);
1449
 
1450
              volatile_ok = save_volatile_ok;
1451
            }
1452
 
1453
          /* If op0 is a register, we need it in MAXMODE (which is usually
1454
             SImode). to make it acceptable to the format of extzv.  */
1455
          if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1456
            goto extzv_loses;
1457
          if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1458
            xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1459
 
1460
          /* On big-endian machines, we count bits from the most significant.
1461
             If the bit field insn does not, we must invert.  */
1462
          if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1463
            xbitpos = unit - bitsize - xbitpos;
1464
 
1465
          /* Now convert from counting within UNIT to counting in MAXMODE.  */
1466
          if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1467
            xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1468
 
1469
          unit = GET_MODE_BITSIZE (maxmode);
1470
 
1471
          if (xtarget == 0)
1472
            xtarget = xspec_target = gen_reg_rtx (tmode);
1473
 
1474
          if (GET_MODE (xtarget) != maxmode)
1475
            {
1476
              if (REG_P (xtarget))
1477
                {
1478
                  int wider = (GET_MODE_SIZE (maxmode)
1479
                               > GET_MODE_SIZE (GET_MODE (xtarget)));
1480
                  xtarget = gen_lowpart (maxmode, xtarget);
1481
                  if (wider)
1482
                    xspec_target_subreg = xtarget;
1483
                }
1484
              else
1485
                xtarget = gen_reg_rtx (maxmode);
1486
            }
1487
 
1488
          /* If this machine's extzv insists on a register target,
1489
             make sure we have one.  */
1490
          if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1491
                 (xtarget, maxmode)))
1492
            xtarget = gen_reg_rtx (maxmode);
1493
 
1494
          bitsize_rtx = GEN_INT (bitsize);
1495
          bitpos_rtx = GEN_INT (xbitpos);
1496
 
1497
          pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1498
          if (pat)
1499
            {
1500
              emit_insn (pat);
1501
              target = xtarget;
1502
              spec_target = xspec_target;
1503
              spec_target_subreg = xspec_target_subreg;
1504
            }
1505
          else
1506
            {
1507
              delete_insns_since (last);
1508
              target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1509
                                                bitpos, target, 1);
1510
            }
1511
        }
1512
      else
1513
      extzv_loses:
1514
        target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1515
                                          bitpos, target, 1);
1516
    }
1517
  else
1518
    {
1519
      if (HAVE_extv
1520
          && bitsize > 0
1521
          && GET_MODE_BITSIZE (extv_mode) >= bitsize
1522
          && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1523
                && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1524
        {
1525
          int xbitpos = bitpos, xoffset = offset;
1526
          rtx bitsize_rtx, bitpos_rtx;
1527
          rtx last = get_last_insn ();
1528
          rtx xop0 = op0, xtarget = target;
1529
          rtx xspec_target = spec_target;
1530
          rtx xspec_target_subreg = spec_target_subreg;
1531
          rtx pat;
1532
          enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1533
 
1534
          if (MEM_P (xop0))
1535
            {
1536
              /* Is the memory operand acceptable?  */
1537
              if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1538
                     (xop0, GET_MODE (xop0))))
1539
                {
1540
                  /* No, load into a reg and extract from there.  */
1541
                  enum machine_mode bestmode;
1542
 
1543
                  /* Get the mode to use for inserting into this field.  If
1544
                     OP0 is BLKmode, get the smallest mode consistent with the
1545
                     alignment. If OP0 is a non-BLKmode object that is no
1546
                     wider than MAXMODE, use its mode. Otherwise, use the
1547
                     smallest mode containing the field.  */
1548
 
1549
                  if (GET_MODE (xop0) == BLKmode
1550
                      || (GET_MODE_SIZE (GET_MODE (op0))
1551
                          > GET_MODE_SIZE (maxmode)))
1552
                    bestmode = get_best_mode (bitsize, bitnum,
1553
                                              MEM_ALIGN (xop0), maxmode,
1554
                                              MEM_VOLATILE_P (xop0));
1555
                  else
1556
                    bestmode = GET_MODE (xop0);
1557
 
1558
                  if (bestmode == VOIDmode
1559
                      || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1560
                          && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1561
                    goto extv_loses;
1562
 
1563
                  /* Compute offset as multiple of this unit,
1564
                     counting in bytes.  */
1565
                  unit = GET_MODE_BITSIZE (bestmode);
1566
                  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1567
                  xbitpos = bitnum % unit;
1568
                  xop0 = adjust_address (xop0, bestmode, xoffset);
1569
 
1570
                  /* Make sure register is big enough for the whole field. */
1571
                  if (xoffset * BITS_PER_UNIT + unit
1572
                      < offset * BITS_PER_UNIT + bitsize)
1573
                    goto extv_loses;
1574
 
1575
                  /* Fetch it to a register in that size.  */
1576
                  xop0 = force_reg (bestmode, xop0);
1577
 
1578
                  /* XBITPOS counts within UNIT, which is what is expected.  */
1579
                }
1580
              else
1581
                /* Get ref to first byte containing part of the field.  */
1582
                xop0 = adjust_address (xop0, byte_mode, xoffset);
1583
            }
1584
 
1585
          /* If op0 is a register, we need it in MAXMODE (which is usually
1586
             SImode) to make it acceptable to the format of extv.  */
1587
          if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1588
            goto extv_loses;
1589
          if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1590
            xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1591
 
1592
          /* On big-endian machines, we count bits from the most significant.
1593
             If the bit field insn does not, we must invert.  */
1594
          if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1595
            xbitpos = unit - bitsize - xbitpos;
1596
 
1597
          /* XBITPOS counts within a size of UNIT.
1598
             Adjust to count within a size of MAXMODE.  */
1599
          if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1600
            xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1601
 
1602
          unit = GET_MODE_BITSIZE (maxmode);
1603
 
1604
          if (xtarget == 0)
1605
            xtarget = xspec_target = gen_reg_rtx (tmode);
1606
 
1607
          if (GET_MODE (xtarget) != maxmode)
1608
            {
1609
              if (REG_P (xtarget))
1610
                {
1611
                  int wider = (GET_MODE_SIZE (maxmode)
1612
                               > GET_MODE_SIZE (GET_MODE (xtarget)));
1613
                  xtarget = gen_lowpart (maxmode, xtarget);
1614
                  if (wider)
1615
                    xspec_target_subreg = xtarget;
1616
                }
1617
              else
1618
                xtarget = gen_reg_rtx (maxmode);
1619
            }
1620
 
1621
          /* If this machine's extv insists on a register target,
1622
             make sure we have one.  */
1623
          if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1624
                 (xtarget, maxmode)))
1625
            xtarget = gen_reg_rtx (maxmode);
1626
 
1627
          bitsize_rtx = GEN_INT (bitsize);
1628
          bitpos_rtx = GEN_INT (xbitpos);
1629
 
1630
          pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1631
          if (pat)
1632
            {
1633
              emit_insn (pat);
1634
              target = xtarget;
1635
              spec_target = xspec_target;
1636
              spec_target_subreg = xspec_target_subreg;
1637
            }
1638
          else
1639
            {
1640
              delete_insns_since (last);
1641
              target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1642
                                                bitpos, target, 0);
1643
            }
1644
        }
1645
      else
1646
      extv_loses:
1647
        target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1648
                                          bitpos, target, 0);
1649
    }
1650
  if (target == spec_target)
1651
    return target;
1652
  if (target == spec_target_subreg)
1653
    return spec_target;
1654
  if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1655
    {
1656
      /* If the target mode is not a scalar integral, first convert to the
1657
         integer mode of that size and then access it as a floating-point
1658
         value via a SUBREG.  */
1659
      if (!SCALAR_INT_MODE_P (tmode))
1660
        {
1661
          enum machine_mode smode
1662
            = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1663
          target = convert_to_mode (smode, target, unsignedp);
1664
          target = force_reg (smode, target);
1665
          return gen_lowpart (tmode, target);
1666
        }
1667
 
1668
      return convert_to_mode (tmode, target, unsignedp);
1669
    }
1670
  return target;
1671
}
1672
 
1673
/* Extract a bit field using shifts and boolean operations
1674
   Returns an rtx to represent the value.
1675
   OP0 addresses a register (word) or memory (byte).
1676
   BITPOS says which bit within the word or byte the bit field starts in.
1677
   OFFSET says how many bytes farther the bit field starts;
1678
    it is 0 if OP0 is a register.
1679
   BITSIZE says how many bits long the bit field is.
1680
    (If OP0 is a register, it may be narrower than a full word,
1681
     but BITPOS still counts within a full word,
1682
     which is significant on bigendian machines.)
1683
 
1684
   UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1685
   If TARGET is nonzero, attempts to store the value there
1686
   and return TARGET, but this is not guaranteed.
1687
   If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1688
 
1689
static rtx
1690
extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1691
                         unsigned HOST_WIDE_INT offset,
1692
                         unsigned HOST_WIDE_INT bitsize,
1693
                         unsigned HOST_WIDE_INT bitpos, rtx target,
1694
                         int unsignedp)
1695
{
1696
  unsigned int total_bits = BITS_PER_WORD;
1697
  enum machine_mode mode;
1698
 
1699
  if (GET_CODE (op0) == SUBREG || REG_P (op0))
1700
    {
1701
      /* Special treatment for a bit field split across two registers.  */
1702
      if (bitsize + bitpos > BITS_PER_WORD)
1703
        return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1704
    }
1705
  else
1706
    {
1707
      /* Get the proper mode to use for this field.  We want a mode that
1708
         includes the entire field.  If such a mode would be larger than
1709
         a word, we won't be doing the extraction the normal way.  */
1710
 
1711
      mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1712
                            MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1713
 
1714
      if (mode == VOIDmode)
1715
        /* The only way this should occur is if the field spans word
1716
           boundaries.  */
1717
        return extract_split_bit_field (op0, bitsize,
1718
                                        bitpos + offset * BITS_PER_UNIT,
1719
                                        unsignedp);
1720
 
1721
      total_bits = GET_MODE_BITSIZE (mode);
1722
 
1723
      /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1724
         be in the range 0 to total_bits-1, and put any excess bytes in
1725
         OFFSET.  */
1726
      if (bitpos >= total_bits)
1727
        {
1728
          offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1729
          bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1730
                     * BITS_PER_UNIT);
1731
        }
1732
 
1733
      /* Get ref to an aligned byte, halfword, or word containing the field.
1734
         Adjust BITPOS to be position within a word,
1735
         and OFFSET to be the offset of that word.
1736
         Then alter OP0 to refer to that word.  */
1737
      bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1738
      offset -= (offset % (total_bits / BITS_PER_UNIT));
1739
      op0 = adjust_address (op0, mode, offset);
1740
    }
1741
 
1742
  mode = GET_MODE (op0);
1743
 
1744
  if (BYTES_BIG_ENDIAN)
1745
    /* BITPOS is the distance between our msb and that of OP0.
1746
       Convert it to the distance from the lsb.  */
1747
    bitpos = total_bits - bitsize - bitpos;
1748
 
1749
  /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1750
     We have reduced the big-endian case to the little-endian case.  */
1751
 
1752
  if (unsignedp)
1753
    {
1754
      if (bitpos)
1755
        {
1756
          /* If the field does not already start at the lsb,
1757
             shift it so it does.  */
1758
          tree amount = build_int_cst (NULL_TREE, bitpos);
1759
          /* Maybe propagate the target for the shift.  */
1760
          /* But not if we will return it--could confuse integrate.c.  */
1761
          rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1762
          if (tmode != mode) subtarget = 0;
1763
          op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1764
        }
1765
      /* Convert the value to the desired mode.  */
1766
      if (mode != tmode)
1767
        op0 = convert_to_mode (tmode, op0, 1);
1768
 
1769
      /* Unless the msb of the field used to be the msb when we shifted,
1770
         mask out the upper bits.  */
1771
 
1772
      if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1773
        return expand_binop (GET_MODE (op0), and_optab, op0,
1774
                             mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1775
                             target, 1, OPTAB_LIB_WIDEN);
1776
      return op0;
1777
    }
1778
 
1779
  /* To extract a signed bit-field, first shift its msb to the msb of the word,
1780
     then arithmetic-shift its lsb to the lsb of the word.  */
1781
  op0 = force_reg (mode, op0);
1782
  if (mode != tmode)
1783
    target = 0;
1784
 
1785
  /* Find the narrowest integer mode that contains the field.  */
1786
 
1787
  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1788
       mode = GET_MODE_WIDER_MODE (mode))
1789
    if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1790
      {
1791
        op0 = convert_to_mode (mode, op0, 0);
1792
        break;
1793
      }
1794
 
1795
  if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1796
    {
1797
      tree amount
1798
        = build_int_cst (NULL_TREE,
1799
                         GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1800
      /* Maybe propagate the target for the shift.  */
1801
      rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1802
      op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1803
    }
1804
 
1805
  return expand_shift (RSHIFT_EXPR, mode, op0,
1806
                       build_int_cst (NULL_TREE,
1807
                                      GET_MODE_BITSIZE (mode) - bitsize),
1808
                       target, 0);
1809
}
1810
 
1811
/* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1812
   of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1813
   complement of that if COMPLEMENT.  The mask is truncated if
1814
   necessary to the width of mode MODE.  The mask is zero-extended if
1815
   BITSIZE+BITPOS is too small for MODE.  */
1816
 
1817
static rtx
1818
mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1819
{
1820
  HOST_WIDE_INT masklow, maskhigh;
1821
 
1822
  if (bitsize == 0)
1823
    masklow = 0;
1824
  else if (bitpos < HOST_BITS_PER_WIDE_INT)
1825
    masklow = (HOST_WIDE_INT) -1 << bitpos;
1826
  else
1827
    masklow = 0;
1828
 
1829
  if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1830
    masklow &= ((unsigned HOST_WIDE_INT) -1
1831
                >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1832
 
1833
  if (bitpos <= HOST_BITS_PER_WIDE_INT)
1834
    maskhigh = -1;
1835
  else
1836
    maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1837
 
1838
  if (bitsize == 0)
1839
    maskhigh = 0;
1840
  else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1841
    maskhigh &= ((unsigned HOST_WIDE_INT) -1
1842
                 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1843
  else
1844
    maskhigh = 0;
1845
 
1846
  if (complement)
1847
    {
1848
      maskhigh = ~maskhigh;
1849
      masklow = ~masklow;
1850
    }
1851
 
1852
  return immed_double_const (masklow, maskhigh, mode);
1853
}
1854
 
1855
/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1856
   VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1857
 
1858
static rtx
1859
lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1860
{
1861
  unsigned HOST_WIDE_INT v = INTVAL (value);
1862
  HOST_WIDE_INT low, high;
1863
 
1864
  if (bitsize < HOST_BITS_PER_WIDE_INT)
1865
    v &= ~((HOST_WIDE_INT) -1 << bitsize);
1866
 
1867
  if (bitpos < HOST_BITS_PER_WIDE_INT)
1868
    {
1869
      low = v << bitpos;
1870
      high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1871
    }
1872
  else
1873
    {
1874
      low = 0;
1875
      high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1876
    }
1877
 
1878
  return immed_double_const (low, high, mode);
1879
}
1880
 
1881
/* Extract a bit field from a memory by forcing the alignment of the
1882
   memory.  This efficient only if the field spans at least 4 boundaries.
1883
 
1884
   OP0 is the MEM.
1885
   BITSIZE is the field width; BITPOS is the position of the first bit.
1886
   UNSIGNEDP is true if the result should be zero-extended.  */
1887
 
1888
static rtx
1889
extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1890
                                   unsigned HOST_WIDE_INT bitpos,
1891
                                   int unsignedp)
1892
{
1893
  enum machine_mode mode, dmode;
1894
  unsigned int m_bitsize, m_size;
1895
  unsigned int sign_shift_up, sign_shift_dn;
1896
  rtx base, a1, a2, v1, v2, comb, shift, result, start;
1897
 
1898
  /* Choose a mode that will fit BITSIZE.  */
1899
  mode = smallest_mode_for_size (bitsize, MODE_INT);
1900
  m_size = GET_MODE_SIZE (mode);
1901
  m_bitsize = GET_MODE_BITSIZE (mode);
1902
 
1903
  /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1904
  dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1905
  if (dmode == BLKmode)
1906
    return NULL;
1907
 
1908
  do_pending_stack_adjust ();
1909
  start = get_last_insn ();
1910
 
1911
  /* At the end, we'll need an additional shift to deal with sign/zero
1912
     extension.  By default this will be a left+right shift of the
1913
     appropriate size.  But we may be able to eliminate one of them.  */
1914
  sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1915
 
1916
  if (STRICT_ALIGNMENT)
1917
    {
1918
      base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1919
      bitpos %= BITS_PER_UNIT;
1920
 
1921
      /* We load two values to be concatenate.  There's an edge condition
1922
         that bears notice -- an aligned value at the end of a page can
1923
         only load one value lest we segfault.  So the two values we load
1924
         are at "base & -size" and "(base + size - 1) & -size".  If base
1925
         is unaligned, the addresses will be aligned and sequential; if
1926
         base is aligned, the addresses will both be equal to base.  */
1927
 
1928
      a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1929
                                GEN_INT (-(HOST_WIDE_INT)m_size),
1930
                                NULL, true, OPTAB_LIB_WIDEN);
1931
      mark_reg_pointer (a1, m_bitsize);
1932
      v1 = gen_rtx_MEM (mode, a1);
1933
      set_mem_align (v1, m_bitsize);
1934
      v1 = force_reg (mode, validize_mem (v1));
1935
 
1936
      a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1937
      a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1938
                                GEN_INT (-(HOST_WIDE_INT)m_size),
1939
                                NULL, true, OPTAB_LIB_WIDEN);
1940
      v2 = gen_rtx_MEM (mode, a2);
1941
      set_mem_align (v2, m_bitsize);
1942
      v2 = force_reg (mode, validize_mem (v2));
1943
 
1944
      /* Combine these two values into a double-word value.  */
1945
      if (m_bitsize == BITS_PER_WORD)
1946
        {
1947
          comb = gen_reg_rtx (dmode);
1948
          emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1949
          emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1950
          emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1951
        }
1952
      else
1953
        {
1954
          if (BYTES_BIG_ENDIAN)
1955
            comb = v1, v1 = v2, v2 = comb;
1956
          v1 = convert_modes (dmode, mode, v1, true);
1957
          if (v1 == NULL)
1958
            goto fail;
1959
          v2 = convert_modes (dmode, mode, v2, true);
1960
          v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1961
                                    NULL, true, OPTAB_LIB_WIDEN);
1962
          if (v2 == NULL)
1963
            goto fail;
1964
          comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1965
                                      true, OPTAB_LIB_WIDEN);
1966
          if (comb == NULL)
1967
            goto fail;
1968
        }
1969
 
1970
      shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1971
                                   NULL, true, OPTAB_LIB_WIDEN);
1972
      shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1973
 
1974
      if (bitpos != 0)
1975
        {
1976
          if (sign_shift_up <= bitpos)
1977
            bitpos -= sign_shift_up, sign_shift_up = 0;
1978
          shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1979
                                       NULL, true, OPTAB_LIB_WIDEN);
1980
        }
1981
    }
1982
  else
1983
    {
1984
      unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1985
      bitpos %= BITS_PER_UNIT;
1986
 
1987
      /* When strict alignment is not required, we can just load directly
1988
         from memory without masking.  If the remaining BITPOS offset is
1989
         small enough, we may be able to do all operations in MODE as
1990
         opposed to DMODE.  */
1991
      if (bitpos + bitsize <= m_bitsize)
1992
        dmode = mode;
1993
      comb = adjust_address (op0, dmode, offset);
1994
 
1995
      if (sign_shift_up <= bitpos)
1996
        bitpos -= sign_shift_up, sign_shift_up = 0;
1997
      shift = GEN_INT (bitpos);
1998
    }
1999
 
2000
  /* Shift down the double-word such that the requested value is at bit 0.  */
2001
  if (shift != const0_rtx)
2002
    comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
2003
                                comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
2004
  if (comb == NULL)
2005
    goto fail;
2006
 
2007
  /* If the field exactly matches MODE, then all we need to do is return the
2008
     lowpart.  Otherwise, shift to get the sign bits set properly.  */
2009
  result = force_reg (mode, gen_lowpart (mode, comb));
2010
 
2011
  if (sign_shift_up)
2012
    result = expand_simple_binop (mode, ASHIFT, result,
2013
                                  GEN_INT (sign_shift_up),
2014
                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
2015
  if (sign_shift_dn)
2016
    result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
2017
                                  result, GEN_INT (sign_shift_dn),
2018
                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
2019
 
2020
  return result;
2021
 
2022
 fail:
2023
  delete_insns_since (start);
2024
  return NULL;
2025
}
2026
 
2027
/* Extract a bit field that is split across two words
2028
   and return an RTX for the result.
2029
 
2030
   OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2031
   BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2032
   UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2033
 
2034
static rtx
2035
extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2036
                         unsigned HOST_WIDE_INT bitpos, int unsignedp)
2037
{
2038
  unsigned int unit;
2039
  unsigned int bitsdone = 0;
2040
  rtx result = NULL_RTX;
2041
  int first = 1;
2042
 
2043
  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2044
     much at a time.  */
2045
  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2046
    unit = BITS_PER_WORD;
2047
  else
2048
    {
2049
      unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2050
      if (0 && bitsize / unit > 2)
2051
        {
2052
          rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2053
                                                       unsignedp);
2054
          if (tmp)
2055
            return tmp;
2056
        }
2057
    }
2058
 
2059
  while (bitsdone < bitsize)
2060
    {
2061
      unsigned HOST_WIDE_INT thissize;
2062
      rtx part, word;
2063
      unsigned HOST_WIDE_INT thispos;
2064
      unsigned HOST_WIDE_INT offset;
2065
 
2066
      offset = (bitpos + bitsdone) / unit;
2067
      thispos = (bitpos + bitsdone) % unit;
2068
 
2069
      /* THISSIZE must not overrun a word boundary.  Otherwise,
2070
         extract_fixed_bit_field will call us again, and we will mutually
2071
         recurse forever.  */
2072
      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2073
      thissize = MIN (thissize, unit - thispos);
2074
 
2075
      /* If OP0 is a register, then handle OFFSET here.
2076
 
2077
         When handling multiword bitfields, extract_bit_field may pass
2078
         down a word_mode SUBREG of a larger REG for a bitfield that actually
2079
         crosses a word boundary.  Thus, for a SUBREG, we must find
2080
         the current word starting from the base register.  */
2081
      if (GET_CODE (op0) == SUBREG)
2082
        {
2083
          int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2084
          word = operand_subword_force (SUBREG_REG (op0), word_offset,
2085
                                        GET_MODE (SUBREG_REG (op0)));
2086
          offset = 0;
2087
        }
2088
      else if (REG_P (op0))
2089
        {
2090
          word = operand_subword_force (op0, offset, GET_MODE (op0));
2091
          offset = 0;
2092
        }
2093
      else
2094
        word = op0;
2095
 
2096
      /* Extract the parts in bit-counting order,
2097
         whose meaning is determined by BYTES_PER_UNIT.
2098
         OFFSET is in UNITs, and UNIT is in bits.
2099
         extract_fixed_bit_field wants offset in bytes.  */
2100
      part = extract_fixed_bit_field (word_mode, word,
2101
                                      offset * unit / BITS_PER_UNIT,
2102
                                      thissize, thispos, 0, 1);
2103
      bitsdone += thissize;
2104
 
2105
      /* Shift this part into place for the result.  */
2106
      if (BYTES_BIG_ENDIAN)
2107
        {
2108
          if (bitsize != bitsdone)
2109
            part = expand_shift (LSHIFT_EXPR, word_mode, part,
2110
                                 build_int_cst (NULL_TREE, bitsize - bitsdone),
2111
                                 0, 1);
2112
        }
2113
      else
2114
        {
2115
          if (bitsdone != thissize)
2116
            part = expand_shift (LSHIFT_EXPR, word_mode, part,
2117
                                 build_int_cst (NULL_TREE,
2118
                                                bitsdone - thissize), 0, 1);
2119
        }
2120
 
2121
      if (first)
2122
        result = part;
2123
      else
2124
        /* Combine the parts with bitwise or.  This works
2125
           because we extracted each part as an unsigned bit field.  */
2126
        result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2127
                               OPTAB_LIB_WIDEN);
2128
 
2129
      first = 0;
2130
    }
2131
 
2132
  /* Unsigned bit field: we are done.  */
2133
  if (unsignedp)
2134
    return result;
2135
  /* Signed bit field: sign-extend with two arithmetic shifts.  */
2136
  result = expand_shift (LSHIFT_EXPR, word_mode, result,
2137
                         build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2138
                         NULL_RTX, 0);
2139
  return expand_shift (RSHIFT_EXPR, word_mode, result,
2140
                       build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2141
                       NULL_RTX, 0);
2142
}
2143
 
2144
/* Add INC into TARGET.  */
2145
 
2146
void
2147
expand_inc (rtx target, rtx inc)
2148
{
2149
  rtx value = expand_binop (GET_MODE (target), add_optab,
2150
                            target, inc,
2151
                            target, 0, OPTAB_LIB_WIDEN);
2152
  if (value != target)
2153
    emit_move_insn (target, value);
2154
}
2155
 
2156
/* Subtract DEC from TARGET.  */
2157
 
2158
void
2159
expand_dec (rtx target, rtx dec)
2160
{
2161
  rtx value = expand_binop (GET_MODE (target), sub_optab,
2162
                            target, dec,
2163
                            target, 0, OPTAB_LIB_WIDEN);
2164
  if (value != target)
2165
    emit_move_insn (target, value);
2166
}
2167
 
2168
/* Output a shift instruction for expression code CODE,
2169
   with SHIFTED being the rtx for the value to shift,
2170
   and AMOUNT the tree for the amount to shift by.
2171
   Store the result in the rtx TARGET, if that is convenient.
2172
   If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2173
   Return the rtx for where the value is.  */
2174
 
2175
rtx
2176
expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2177
              tree amount, rtx target, int unsignedp)
2178
{
2179
  rtx op1, temp = 0;
2180
  int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2181
  int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2182
  int try;
2183
 
2184
  /* Previously detected shift-counts computed by NEGATE_EXPR
2185
     and shifted in the other direction; but that does not work
2186
     on all machines.  */
2187
 
2188
  op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2189
 
2190
  if (SHIFT_COUNT_TRUNCATED)
2191
    {
2192
      if (GET_CODE (op1) == CONST_INT
2193
          && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2194
              (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2195
        op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2196
                       % GET_MODE_BITSIZE (mode));
2197
      else if (GET_CODE (op1) == SUBREG
2198
               && subreg_lowpart_p (op1))
2199
        op1 = SUBREG_REG (op1);
2200
    }
2201
 
2202
  if (op1 == const0_rtx)
2203
    return shifted;
2204
 
2205
  /* Check whether its cheaper to implement a left shift by a constant
2206
     bit count by a sequence of additions.  */
2207
  if (code == LSHIFT_EXPR
2208
      && GET_CODE (op1) == CONST_INT
2209
      && INTVAL (op1) > 0
2210
      && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2211
      && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2212
    {
2213
      int i;
2214
      for (i = 0; i < INTVAL (op1); i++)
2215
        {
2216
          temp = force_reg (mode, shifted);
2217
          shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2218
                                  unsignedp, OPTAB_LIB_WIDEN);
2219
        }
2220
      return shifted;
2221
    }
2222
 
2223
  for (try = 0; temp == 0 && try < 3; try++)
2224
    {
2225
      enum optab_methods methods;
2226
 
2227
      if (try == 0)
2228
        methods = OPTAB_DIRECT;
2229
      else if (try == 1)
2230
        methods = OPTAB_WIDEN;
2231
      else
2232
        methods = OPTAB_LIB_WIDEN;
2233
 
2234
      if (rotate)
2235
        {
2236
          /* Widening does not work for rotation.  */
2237
          if (methods == OPTAB_WIDEN)
2238
            continue;
2239
          else if (methods == OPTAB_LIB_WIDEN)
2240
            {
2241
              /* If we have been unable to open-code this by a rotation,
2242
                 do it as the IOR of two shifts.  I.e., to rotate A
2243
                 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2244
                 where C is the bitsize of A.
2245
 
2246
                 It is theoretically possible that the target machine might
2247
                 not be able to perform either shift and hence we would
2248
                 be making two libcalls rather than just the one for the
2249
                 shift (similarly if IOR could not be done).  We will allow
2250
                 this extremely unlikely lossage to avoid complicating the
2251
                 code below.  */
2252
 
2253
              rtx subtarget = target == shifted ? 0 : target;
2254
              rtx temp1;
2255
              tree type = TREE_TYPE (amount);
2256
              tree new_amount = make_tree (type, op1);
2257
              tree other_amount
2258
                = fold_build2 (MINUS_EXPR, type,
2259
                               build_int_cst (type, GET_MODE_BITSIZE (mode)),
2260
                               amount);
2261
 
2262
              shifted = force_reg (mode, shifted);
2263
 
2264
              temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2265
                                   mode, shifted, new_amount, 0, 1);
2266
              temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2267
                                    mode, shifted, other_amount, subtarget, 1);
2268
              return expand_binop (mode, ior_optab, temp, temp1, target,
2269
                                   unsignedp, methods);
2270
            }
2271
 
2272
          temp = expand_binop (mode,
2273
                               left ? rotl_optab : rotr_optab,
2274
                               shifted, op1, target, unsignedp, methods);
2275
        }
2276
      else if (unsignedp)
2277
        temp = expand_binop (mode,
2278
                             left ? ashl_optab : lshr_optab,
2279
                             shifted, op1, target, unsignedp, methods);
2280
 
2281
      /* Do arithmetic shifts.
2282
         Also, if we are going to widen the operand, we can just as well
2283
         use an arithmetic right-shift instead of a logical one.  */
2284
      if (temp == 0 && ! rotate
2285
          && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2286
        {
2287
          enum optab_methods methods1 = methods;
2288
 
2289
          /* If trying to widen a log shift to an arithmetic shift,
2290
             don't accept an arithmetic shift of the same size.  */
2291
          if (unsignedp)
2292
            methods1 = OPTAB_MUST_WIDEN;
2293
 
2294
          /* Arithmetic shift */
2295
 
2296
          temp = expand_binop (mode,
2297
                               left ? ashl_optab : ashr_optab,
2298
                               shifted, op1, target, unsignedp, methods1);
2299
        }
2300
 
2301
      /* We used to try extzv here for logical right shifts, but that was
2302
         only useful for one machine, the VAX, and caused poor code
2303
         generation there for lshrdi3, so the code was deleted and a
2304
         define_expand for lshrsi3 was added to vax.md.  */
2305
    }
2306
 
2307
  gcc_assert (temp);
2308
  return temp;
2309
}
2310
 
2311
enum alg_code {
2312
  alg_unknown,
2313
  alg_zero,
2314
  alg_m, alg_shift,
2315
  alg_add_t_m2,
2316
  alg_sub_t_m2,
2317
  alg_add_factor,
2318
  alg_sub_factor,
2319
  alg_add_t2_m,
2320
  alg_sub_t2_m,
2321
  alg_impossible
2322
};
2323
 
2324
/* This structure holds the "cost" of a multiply sequence.  The
2325
   "cost" field holds the total rtx_cost of every operator in the
2326
   synthetic multiplication sequence, hence cost(a op b) is defined
2327
   as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2328
   The "latency" field holds the minimum possible latency of the
2329
   synthetic multiply, on a hypothetical infinitely parallel CPU.
2330
   This is the critical path, or the maximum height, of the expression
2331
   tree which is the sum of rtx_costs on the most expensive path from
2332
   any leaf to the root.  Hence latency(a op b) is defined as zero for
2333
   leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2334
 
2335
struct mult_cost {
2336
  short cost;     /* Total rtx_cost of the multiplication sequence.  */
2337
  short latency;  /* The latency of the multiplication sequence.  */
2338
};
2339
 
2340
/* This macro is used to compare a pointer to a mult_cost against an
2341
   single integer "rtx_cost" value.  This is equivalent to the macro
2342
   CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2343
#define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2344
                             || ((X)->cost == (Y) && (X)->latency < (Y)))
2345
 
2346
/* This macro is used to compare two pointers to mult_costs against
2347
   each other.  The macro returns true if X is cheaper than Y.
2348
   Currently, the cheaper of two mult_costs is the one with the
2349
   lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2350
#define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2351
                                 || ((X)->cost == (Y)->cost     \
2352
                                     && (X)->latency < (Y)->latency))
2353
 
2354
/* This structure records a sequence of operations.
2355
   `ops' is the number of operations recorded.
2356
   `cost' is their total cost.
2357
   The operations are stored in `op' and the corresponding
2358
   logarithms of the integer coefficients in `log'.
2359
 
2360
   These are the operations:
2361
   alg_zero             total := 0;
2362
   alg_m                total := multiplicand;
2363
   alg_shift            total := total * coeff
2364
   alg_add_t_m2         total := total + multiplicand * coeff;
2365
   alg_sub_t_m2         total := total - multiplicand * coeff;
2366
   alg_add_factor       total := total * coeff + total;
2367
   alg_sub_factor       total := total * coeff - total;
2368
   alg_add_t2_m         total := total * coeff + multiplicand;
2369
   alg_sub_t2_m         total := total * coeff - multiplicand;
2370
 
2371
   The first operand must be either alg_zero or alg_m.  */
2372
 
2373
struct algorithm
2374
{
2375
  struct mult_cost cost;
2376
  short ops;
2377
  /* The size of the OP and LOG fields are not directly related to the
2378
     word size, but the worst-case algorithms will be if we have few
2379
     consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2380
     In that case we will generate shift-by-2, add, shift-by-2, add,...,
2381
     in total wordsize operations.  */
2382
  enum alg_code op[MAX_BITS_PER_WORD];
2383
  char log[MAX_BITS_PER_WORD];
2384
};
2385
 
2386
/* The entry for our multiplication cache/hash table.  */
2387
struct alg_hash_entry {
2388
  /* The number we are multiplying by.  */
2389
  unsigned int t;
2390
 
2391
  /* The mode in which we are multiplying something by T.  */
2392
  enum machine_mode mode;
2393
 
2394
  /* The best multiplication algorithm for t.  */
2395
  enum alg_code alg;
2396
 
2397
  /* The cost of multiplication if ALG_CODE is not alg_impossible.
2398
     Otherwise, the cost within which multiplication by T is
2399
     impossible.  */
2400
  struct mult_cost cost;
2401
};
2402
 
2403
/* The number of cache/hash entries.  */
2404
#define NUM_ALG_HASH_ENTRIES 307
2405
 
2406
/* Each entry of ALG_HASH caches alg_code for some integer.  This is
2407
   actually a hash table.  If we have a collision, that the older
2408
   entry is kicked out.  */
2409
static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2410
 
2411
/* Indicates the type of fixup needed after a constant multiplication.
2412
   BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2413
   the result should be negated, and ADD_VARIANT means that the
2414
   multiplicand should be added to the result.  */
2415
enum mult_variant {basic_variant, negate_variant, add_variant};
2416
 
2417
static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2418
                        const struct mult_cost *, enum machine_mode mode);
2419
static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2420
                                 struct algorithm *, enum mult_variant *, int);
2421
static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2422
                              const struct algorithm *, enum mult_variant);
2423
static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2424
                                                 int, rtx *, int *, int *);
2425
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2426
static rtx extract_high_half (enum machine_mode, rtx);
2427
static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2428
static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2429
                                       int, int);
2430
/* Compute and return the best algorithm for multiplying by T.
2431
   The algorithm must cost less than cost_limit
2432
   If retval.cost >= COST_LIMIT, no algorithm was found and all
2433
   other field of the returned struct are undefined.
2434
   MODE is the machine mode of the multiplication.  */
2435
 
2436
static void
2437
synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2438
            const struct mult_cost *cost_limit, enum machine_mode mode)
2439
{
2440
  int m;
2441
  struct algorithm *alg_in, *best_alg;
2442
  struct mult_cost best_cost;
2443
  struct mult_cost new_limit;
2444
  int op_cost, op_latency;
2445
  unsigned HOST_WIDE_INT q;
2446
  int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2447
  int hash_index;
2448
  bool cache_hit = false;
2449
  enum alg_code cache_alg = alg_zero;
2450
 
2451
  /* Indicate that no algorithm is yet found.  If no algorithm
2452
     is found, this value will be returned and indicate failure.  */
2453
  alg_out->cost.cost = cost_limit->cost + 1;
2454
  alg_out->cost.latency = cost_limit->latency + 1;
2455
 
2456
  if (cost_limit->cost < 0
2457
      || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2458
    return;
2459
 
2460
  /* Restrict the bits of "t" to the multiplication's mode.  */
2461
  t &= GET_MODE_MASK (mode);
2462
 
2463
  /* t == 1 can be done in zero cost.  */
2464
  if (t == 1)
2465
    {
2466
      alg_out->ops = 1;
2467
      alg_out->cost.cost = 0;
2468
      alg_out->cost.latency = 0;
2469
      alg_out->op[0] = alg_m;
2470
      return;
2471
    }
2472
 
2473
  /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2474
     fail now.  */
2475
  if (t == 0)
2476
    {
2477
      if (MULT_COST_LESS (cost_limit, zero_cost))
2478
        return;
2479
      else
2480
        {
2481
          alg_out->ops = 1;
2482
          alg_out->cost.cost = zero_cost;
2483
          alg_out->cost.latency = zero_cost;
2484
          alg_out->op[0] = alg_zero;
2485
          return;
2486
        }
2487
    }
2488
 
2489
  /* We'll be needing a couple extra algorithm structures now.  */
2490
 
2491
  alg_in = alloca (sizeof (struct algorithm));
2492
  best_alg = alloca (sizeof (struct algorithm));
2493
  best_cost = *cost_limit;
2494
 
2495
  /* Compute the hash index.  */
2496
  hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2497
 
2498
  /* See if we already know what to do for T.  */
2499
  if (alg_hash[hash_index].t == t
2500
      && alg_hash[hash_index].mode == mode
2501
      && alg_hash[hash_index].alg != alg_unknown)
2502
    {
2503
      cache_alg = alg_hash[hash_index].alg;
2504
 
2505
      if (cache_alg == alg_impossible)
2506
        {
2507
          /* The cache tells us that it's impossible to synthesize
2508
             multiplication by T within alg_hash[hash_index].cost.  */
2509
          if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2510
            /* COST_LIMIT is at least as restrictive as the one
2511
               recorded in the hash table, in which case we have no
2512
               hope of synthesizing a multiplication.  Just
2513
               return.  */
2514
            return;
2515
 
2516
          /* If we get here, COST_LIMIT is less restrictive than the
2517
             one recorded in the hash table, so we may be able to
2518
             synthesize a multiplication.  Proceed as if we didn't
2519
             have the cache entry.  */
2520
        }
2521
      else
2522
        {
2523
          if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2524
            /* The cached algorithm shows that this multiplication
2525
               requires more cost than COST_LIMIT.  Just return.  This
2526
               way, we don't clobber this cache entry with
2527
               alg_impossible but retain useful information.  */
2528
            return;
2529
 
2530
          cache_hit = true;
2531
 
2532
          switch (cache_alg)
2533
            {
2534
            case alg_shift:
2535
              goto do_alg_shift;
2536
 
2537
            case alg_add_t_m2:
2538
            case alg_sub_t_m2:
2539
              goto do_alg_addsub_t_m2;
2540
 
2541
            case alg_add_factor:
2542
            case alg_sub_factor:
2543
              goto do_alg_addsub_factor;
2544
 
2545
            case alg_add_t2_m:
2546
              goto do_alg_add_t2_m;
2547
 
2548
            case alg_sub_t2_m:
2549
              goto do_alg_sub_t2_m;
2550
 
2551
            default:
2552
              gcc_unreachable ();
2553
            }
2554
        }
2555
    }
2556
 
2557
  /* If we have a group of zero bits at the low-order part of T, try
2558
     multiplying by the remaining bits and then doing a shift.  */
2559
 
2560
  if ((t & 1) == 0)
2561
    {
2562
    do_alg_shift:
2563
      m = floor_log2 (t & -t);  /* m = number of low zero bits */
2564
      if (m < maxm)
2565
        {
2566
          q = t >> m;
2567
          /* The function expand_shift will choose between a shift and
2568
             a sequence of additions, so the observed cost is given as
2569
             MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2570
          op_cost = m * add_cost[mode];
2571
          if (shift_cost[mode][m] < op_cost)
2572
            op_cost = shift_cost[mode][m];
2573
          new_limit.cost = best_cost.cost - op_cost;
2574
          new_limit.latency = best_cost.latency - op_cost;
2575
          synth_mult (alg_in, q, &new_limit, mode);
2576
 
2577
          alg_in->cost.cost += op_cost;
2578
          alg_in->cost.latency += op_cost;
2579
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2580
            {
2581
              struct algorithm *x;
2582
              best_cost = alg_in->cost;
2583
              x = alg_in, alg_in = best_alg, best_alg = x;
2584
              best_alg->log[best_alg->ops] = m;
2585
              best_alg->op[best_alg->ops] = alg_shift;
2586
            }
2587
        }
2588
      if (cache_hit)
2589
        goto done;
2590
    }
2591
 
2592
  /* If we have an odd number, add or subtract one.  */
2593
  if ((t & 1) != 0)
2594
    {
2595
      unsigned HOST_WIDE_INT w;
2596
 
2597
    do_alg_addsub_t_m2:
2598
      for (w = 1; (w & t) != 0; w <<= 1)
2599
        ;
2600
      /* If T was -1, then W will be zero after the loop.  This is another
2601
         case where T ends with ...111.  Handling this with (T + 1) and
2602
         subtract 1 produces slightly better code and results in algorithm
2603
         selection much faster than treating it like the ...0111 case
2604
         below.  */
2605
      if (w == 0
2606
          || (w > 2
2607
              /* Reject the case where t is 3.
2608
                 Thus we prefer addition in that case.  */
2609
              && t != 3))
2610
        {
2611
          /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2612
 
2613
          op_cost = add_cost[mode];
2614
          new_limit.cost = best_cost.cost - op_cost;
2615
          new_limit.latency = best_cost.latency - op_cost;
2616
          synth_mult (alg_in, t + 1, &new_limit, mode);
2617
 
2618
          alg_in->cost.cost += op_cost;
2619
          alg_in->cost.latency += op_cost;
2620
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2621
            {
2622
              struct algorithm *x;
2623
              best_cost = alg_in->cost;
2624
              x = alg_in, alg_in = best_alg, best_alg = x;
2625
              best_alg->log[best_alg->ops] = 0;
2626
              best_alg->op[best_alg->ops] = alg_sub_t_m2;
2627
            }
2628
        }
2629
      else
2630
        {
2631
          /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2632
 
2633
          op_cost = add_cost[mode];
2634
          new_limit.cost = best_cost.cost - op_cost;
2635
          new_limit.latency = best_cost.latency - op_cost;
2636
          synth_mult (alg_in, t - 1, &new_limit, mode);
2637
 
2638
          alg_in->cost.cost += op_cost;
2639
          alg_in->cost.latency += op_cost;
2640
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2641
            {
2642
              struct algorithm *x;
2643
              best_cost = alg_in->cost;
2644
              x = alg_in, alg_in = best_alg, best_alg = x;
2645
              best_alg->log[best_alg->ops] = 0;
2646
              best_alg->op[best_alg->ops] = alg_add_t_m2;
2647
            }
2648
        }
2649
      if (cache_hit)
2650
        goto done;
2651
    }
2652
 
2653
  /* Look for factors of t of the form
2654
     t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2655
     If we find such a factor, we can multiply by t using an algorithm that
2656
     multiplies by q, shift the result by m and add/subtract it to itself.
2657
 
2658
     We search for large factors first and loop down, even if large factors
2659
     are less probable than small; if we find a large factor we will find a
2660
     good sequence quickly, and therefore be able to prune (by decreasing
2661
     COST_LIMIT) the search.  */
2662
 
2663
 do_alg_addsub_factor:
2664
  for (m = floor_log2 (t - 1); m >= 2; m--)
2665
    {
2666
      unsigned HOST_WIDE_INT d;
2667
 
2668
      d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2669
      if (t % d == 0 && t > d && m < maxm
2670
          && (!cache_hit || cache_alg == alg_add_factor))
2671
        {
2672
          /* If the target has a cheap shift-and-add instruction use
2673
             that in preference to a shift insn followed by an add insn.
2674
             Assume that the shift-and-add is "atomic" with a latency
2675
             equal to its cost, otherwise assume that on superscalar
2676
             hardware the shift may be executed concurrently with the
2677
             earlier steps in the algorithm.  */
2678
          op_cost = add_cost[mode] + shift_cost[mode][m];
2679
          if (shiftadd_cost[mode][m] < op_cost)
2680
            {
2681
              op_cost = shiftadd_cost[mode][m];
2682
              op_latency = op_cost;
2683
            }
2684
          else
2685
            op_latency = add_cost[mode];
2686
 
2687
          new_limit.cost = best_cost.cost - op_cost;
2688
          new_limit.latency = best_cost.latency - op_latency;
2689
          synth_mult (alg_in, t / d, &new_limit, mode);
2690
 
2691
          alg_in->cost.cost += op_cost;
2692
          alg_in->cost.latency += op_latency;
2693
          if (alg_in->cost.latency < op_cost)
2694
            alg_in->cost.latency = op_cost;
2695
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2696
            {
2697
              struct algorithm *x;
2698
              best_cost = alg_in->cost;
2699
              x = alg_in, alg_in = best_alg, best_alg = x;
2700
              best_alg->log[best_alg->ops] = m;
2701
              best_alg->op[best_alg->ops] = alg_add_factor;
2702
            }
2703
          /* Other factors will have been taken care of in the recursion.  */
2704
          break;
2705
        }
2706
 
2707
      d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2708
      if (t % d == 0 && t > d && m < maxm
2709
          && (!cache_hit || cache_alg == alg_sub_factor))
2710
        {
2711
          /* If the target has a cheap shift-and-subtract insn use
2712
             that in preference to a shift insn followed by a sub insn.
2713
             Assume that the shift-and-sub is "atomic" with a latency
2714
             equal to it's cost, otherwise assume that on superscalar
2715
             hardware the shift may be executed concurrently with the
2716
             earlier steps in the algorithm.  */
2717
          op_cost = add_cost[mode] + shift_cost[mode][m];
2718
          if (shiftsub_cost[mode][m] < op_cost)
2719
            {
2720
              op_cost = shiftsub_cost[mode][m];
2721
              op_latency = op_cost;
2722
            }
2723
          else
2724
            op_latency = add_cost[mode];
2725
 
2726
          new_limit.cost = best_cost.cost - op_cost;
2727
          new_limit.latency = best_cost.latency - op_latency;
2728
          synth_mult (alg_in, t / d, &new_limit, mode);
2729
 
2730
          alg_in->cost.cost += op_cost;
2731
          alg_in->cost.latency += op_latency;
2732
          if (alg_in->cost.latency < op_cost)
2733
            alg_in->cost.latency = op_cost;
2734
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2735
            {
2736
              struct algorithm *x;
2737
              best_cost = alg_in->cost;
2738
              x = alg_in, alg_in = best_alg, best_alg = x;
2739
              best_alg->log[best_alg->ops] = m;
2740
              best_alg->op[best_alg->ops] = alg_sub_factor;
2741
            }
2742
          break;
2743
        }
2744
    }
2745
  if (cache_hit)
2746
    goto done;
2747
 
2748
  /* Try shift-and-add (load effective address) instructions,
2749
     i.e. do a*3, a*5, a*9.  */
2750
  if ((t & 1) != 0)
2751
    {
2752
    do_alg_add_t2_m:
2753
      q = t - 1;
2754
      q = q & -q;
2755
      m = exact_log2 (q);
2756
      if (m >= 0 && m < maxm)
2757
        {
2758
          op_cost = shiftadd_cost[mode][m];
2759
          new_limit.cost = best_cost.cost - op_cost;
2760
          new_limit.latency = best_cost.latency - op_cost;
2761
          synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2762
 
2763
          alg_in->cost.cost += op_cost;
2764
          alg_in->cost.latency += op_cost;
2765
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2766
            {
2767
              struct algorithm *x;
2768
              best_cost = alg_in->cost;
2769
              x = alg_in, alg_in = best_alg, best_alg = x;
2770
              best_alg->log[best_alg->ops] = m;
2771
              best_alg->op[best_alg->ops] = alg_add_t2_m;
2772
            }
2773
        }
2774
      if (cache_hit)
2775
        goto done;
2776
 
2777
    do_alg_sub_t2_m:
2778
      q = t + 1;
2779
      q = q & -q;
2780
      m = exact_log2 (q);
2781
      if (m >= 0 && m < maxm)
2782
        {
2783
          op_cost = shiftsub_cost[mode][m];
2784
          new_limit.cost = best_cost.cost - op_cost;
2785
          new_limit.latency = best_cost.latency - op_cost;
2786
          synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2787
 
2788
          alg_in->cost.cost += op_cost;
2789
          alg_in->cost.latency += op_cost;
2790
          if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2791
            {
2792
              struct algorithm *x;
2793
              best_cost = alg_in->cost;
2794
              x = alg_in, alg_in = best_alg, best_alg = x;
2795
              best_alg->log[best_alg->ops] = m;
2796
              best_alg->op[best_alg->ops] = alg_sub_t2_m;
2797
            }
2798
        }
2799
      if (cache_hit)
2800
        goto done;
2801
    }
2802
 
2803
 done:
2804
  /* If best_cost has not decreased, we have not found any algorithm.  */
2805
  if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2806
    {
2807
      /* We failed to find an algorithm.  Record alg_impossible for
2808
         this case (that is, <T, MODE, COST_LIMIT>) so that next time
2809
         we are asked to find an algorithm for T within the same or
2810
         lower COST_LIMIT, we can immediately return to the
2811
         caller.  */
2812
      alg_hash[hash_index].t = t;
2813
      alg_hash[hash_index].mode = mode;
2814
      alg_hash[hash_index].alg = alg_impossible;
2815
      alg_hash[hash_index].cost = *cost_limit;
2816
      return;
2817
    }
2818
 
2819
  /* Cache the result.  */
2820
  if (!cache_hit)
2821
    {
2822
      alg_hash[hash_index].t = t;
2823
      alg_hash[hash_index].mode = mode;
2824
      alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2825
      alg_hash[hash_index].cost.cost = best_cost.cost;
2826
      alg_hash[hash_index].cost.latency = best_cost.latency;
2827
    }
2828
 
2829
  /* If we are getting a too long sequence for `struct algorithm'
2830
     to record, make this search fail.  */
2831
  if (best_alg->ops == MAX_BITS_PER_WORD)
2832
    return;
2833
 
2834
  /* Copy the algorithm from temporary space to the space at alg_out.
2835
     We avoid using structure assignment because the majority of
2836
     best_alg is normally undefined, and this is a critical function.  */
2837
  alg_out->ops = best_alg->ops + 1;
2838
  alg_out->cost = best_cost;
2839
  memcpy (alg_out->op, best_alg->op,
2840
          alg_out->ops * sizeof *alg_out->op);
2841
  memcpy (alg_out->log, best_alg->log,
2842
          alg_out->ops * sizeof *alg_out->log);
2843
}
2844
 
2845
/* Find the cheapest way of multiplying a value of mode MODE by VAL.
2846
   Try three variations:
2847
 
2848
       - a shift/add sequence based on VAL itself
2849
       - a shift/add sequence based on -VAL, followed by a negation
2850
       - a shift/add sequence based on VAL - 1, followed by an addition.
2851
 
2852
   Return true if the cheapest of these cost less than MULT_COST,
2853
   describing the algorithm in *ALG and final fixup in *VARIANT.  */
2854
 
2855
static bool
2856
choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2857
                     struct algorithm *alg, enum mult_variant *variant,
2858
                     int mult_cost)
2859
{
2860
  struct algorithm alg2;
2861
  struct mult_cost limit;
2862
  int op_cost;
2863
 
2864
  /* Fail quickly for impossible bounds.  */
2865
  if (mult_cost < 0)
2866
    return false;
2867
 
2868
  /* Ensure that mult_cost provides a reasonable upper bound.
2869
     Any constant multiplication can be performed with less
2870
     than 2 * bits additions.  */
2871
  op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2872
  if (mult_cost > op_cost)
2873
    mult_cost = op_cost;
2874
 
2875
  *variant = basic_variant;
2876
  limit.cost = mult_cost;
2877
  limit.latency = mult_cost;
2878
  synth_mult (alg, val, &limit, mode);
2879
 
2880
  /* This works only if the inverted value actually fits in an
2881
     `unsigned int' */
2882
  if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2883
    {
2884
      op_cost = neg_cost[mode];
2885
      if (MULT_COST_LESS (&alg->cost, mult_cost))
2886
        {
2887
          limit.cost = alg->cost.cost - op_cost;
2888
          limit.latency = alg->cost.latency - op_cost;
2889
        }
2890
      else
2891
        {
2892
          limit.cost = mult_cost - op_cost;
2893
          limit.latency = mult_cost - op_cost;
2894
        }
2895
 
2896
      synth_mult (&alg2, -val, &limit, mode);
2897
      alg2.cost.cost += op_cost;
2898
      alg2.cost.latency += op_cost;
2899
      if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2900
        *alg = alg2, *variant = negate_variant;
2901
    }
2902
 
2903
  /* This proves very useful for division-by-constant.  */
2904
  op_cost = add_cost[mode];
2905
  if (MULT_COST_LESS (&alg->cost, mult_cost))
2906
    {
2907
      limit.cost = alg->cost.cost - op_cost;
2908
      limit.latency = alg->cost.latency - op_cost;
2909
    }
2910
  else
2911
    {
2912
      limit.cost = mult_cost - op_cost;
2913
      limit.latency = mult_cost - op_cost;
2914
    }
2915
 
2916
  synth_mult (&alg2, val - 1, &limit, mode);
2917
  alg2.cost.cost += op_cost;
2918
  alg2.cost.latency += op_cost;
2919
  if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2920
    *alg = alg2, *variant = add_variant;
2921
 
2922
  return MULT_COST_LESS (&alg->cost, mult_cost);
2923
}
2924
 
2925
/* A subroutine of expand_mult, used for constant multiplications.
2926
   Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2927
   convenient.  Use the shift/add sequence described by ALG and apply
2928
   the final fixup specified by VARIANT.  */
2929
 
2930
static rtx
2931
expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2932
                   rtx target, const struct algorithm *alg,
2933
                   enum mult_variant variant)
2934
{
2935
  HOST_WIDE_INT val_so_far;
2936
  rtx insn, accum, tem;
2937
  int opno;
2938
  enum machine_mode nmode;
2939
 
2940
  /* Avoid referencing memory over and over.
2941
     For speed, but also for correctness when mem is volatile.  */
2942
  if (MEM_P (op0))
2943
    op0 = force_reg (mode, op0);
2944
 
2945
  /* ACCUM starts out either as OP0 or as a zero, depending on
2946
     the first operation.  */
2947
 
2948
  if (alg->op[0] == alg_zero)
2949
    {
2950
      accum = copy_to_mode_reg (mode, const0_rtx);
2951
      val_so_far = 0;
2952
    }
2953
  else if (alg->op[0] == alg_m)
2954
    {
2955
      accum = copy_to_mode_reg (mode, op0);
2956
      val_so_far = 1;
2957
    }
2958
  else
2959
    gcc_unreachable ();
2960
 
2961
  for (opno = 1; opno < alg->ops; opno++)
2962
    {
2963
      int log = alg->log[opno];
2964
      rtx shift_subtarget = optimize ? 0 : accum;
2965
      rtx add_target
2966
        = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2967
           && !optimize)
2968
          ? target : 0;
2969
      rtx accum_target = optimize ? 0 : accum;
2970
 
2971
      switch (alg->op[opno])
2972
        {
2973
        case alg_shift:
2974
          accum = expand_shift (LSHIFT_EXPR, mode, accum,
2975
                                build_int_cst (NULL_TREE, log),
2976
                                NULL_RTX, 0);
2977
          val_so_far <<= log;
2978
          break;
2979
 
2980
        case alg_add_t_m2:
2981
          tem = expand_shift (LSHIFT_EXPR, mode, op0,
2982
                              build_int_cst (NULL_TREE, log),
2983
                              NULL_RTX, 0);
2984
          accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2985
                                 add_target ? add_target : accum_target);
2986
          val_so_far += (HOST_WIDE_INT) 1 << log;
2987
          break;
2988
 
2989
        case alg_sub_t_m2:
2990
          tem = expand_shift (LSHIFT_EXPR, mode, op0,
2991
                              build_int_cst (NULL_TREE, log),
2992
                              NULL_RTX, 0);
2993
          accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2994
                                 add_target ? add_target : accum_target);
2995
          val_so_far -= (HOST_WIDE_INT) 1 << log;
2996
          break;
2997
 
2998
        case alg_add_t2_m:
2999
          accum = expand_shift (LSHIFT_EXPR, mode, accum,
3000
                                build_int_cst (NULL_TREE, log),
3001
                                shift_subtarget,
3002
                                0);
3003
          accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3004
                                 add_target ? add_target : accum_target);
3005
          val_so_far = (val_so_far << log) + 1;
3006
          break;
3007
 
3008
        case alg_sub_t2_m:
3009
          accum = expand_shift (LSHIFT_EXPR, mode, accum,
3010
                                build_int_cst (NULL_TREE, log),
3011
                                shift_subtarget, 0);
3012
          accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3013
                                 add_target ? add_target : accum_target);
3014
          val_so_far = (val_so_far << log) - 1;
3015
          break;
3016
 
3017
        case alg_add_factor:
3018
          tem = expand_shift (LSHIFT_EXPR, mode, accum,
3019
                              build_int_cst (NULL_TREE, log),
3020
                              NULL_RTX, 0);
3021
          accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3022
                                 add_target ? add_target : accum_target);
3023
          val_so_far += val_so_far << log;
3024
          break;
3025
 
3026
        case alg_sub_factor:
3027
          tem = expand_shift (LSHIFT_EXPR, mode, accum,
3028
                              build_int_cst (NULL_TREE, log),
3029
                              NULL_RTX, 0);
3030
          accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3031
                                 (add_target
3032
                                  ? add_target : (optimize ? 0 : tem)));
3033
          val_so_far = (val_so_far << log) - val_so_far;
3034
          break;
3035
 
3036
        default:
3037
          gcc_unreachable ();
3038
        }
3039
 
3040
      /* Write a REG_EQUAL note on the last insn so that we can cse
3041
         multiplication sequences.  Note that if ACCUM is a SUBREG,
3042
         we've set the inner register and must properly indicate
3043
         that.  */
3044
 
3045
      tem = op0, nmode = mode;
3046
      if (GET_CODE (accum) == SUBREG)
3047
        {
3048
          nmode = GET_MODE (SUBREG_REG (accum));
3049
          tem = gen_lowpart (nmode, op0);
3050
        }
3051
 
3052
      insn = get_last_insn ();
3053
      set_unique_reg_note (insn, REG_EQUAL,
3054
                           gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
3055
    }
3056
 
3057
  if (variant == negate_variant)
3058
    {
3059
      val_so_far = -val_so_far;
3060
      accum = expand_unop (mode, neg_optab, accum, target, 0);
3061
    }
3062
  else if (variant == add_variant)
3063
    {
3064
      val_so_far = val_so_far + 1;
3065
      accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3066
    }
3067
 
3068
  /* Compare only the bits of val and val_so_far that are significant
3069
     in the result mode, to avoid sign-/zero-extension confusion.  */
3070
  val &= GET_MODE_MASK (mode);
3071
  val_so_far &= GET_MODE_MASK (mode);
3072
  gcc_assert (val == val_so_far);
3073
 
3074
  return accum;
3075
}
3076
 
3077
/* Perform a multiplication and return an rtx for the result.
3078
   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3079
   TARGET is a suggestion for where to store the result (an rtx).
3080
 
3081
   We check specially for a constant integer as OP1.
3082
   If you want this check for OP0 as well, then before calling
3083
   you should swap the two operands if OP0 would be constant.  */
3084
 
3085
rtx
3086
expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3087
             int unsignedp)
3088
{
3089
  enum mult_variant variant;
3090
  struct algorithm algorithm;
3091
  int max_cost;
3092
 
3093
  /* Handling const0_rtx here allows us to use zero as a rogue value for
3094
     coeff below.  */
3095
  if (op1 == const0_rtx)
3096
    return const0_rtx;
3097
  if (op1 == const1_rtx)
3098
    return op0;
3099
  if (op1 == constm1_rtx)
3100
    return expand_unop (mode,
3101
                        GET_MODE_CLASS (mode) == MODE_INT
3102
                        && !unsignedp && flag_trapv
3103
                        ? negv_optab : neg_optab,
3104
                        op0, target, 0);
3105
 
3106
  /* These are the operations that are potentially turned into a sequence
3107
     of shifts and additions.  */
3108
  if (SCALAR_INT_MODE_P (mode)
3109
      && (unsignedp || !flag_trapv))
3110
    {
3111
      HOST_WIDE_INT coeff = 0;
3112
      rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3113
 
3114
      /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3115
         less than or equal in size to `unsigned int' this doesn't matter.
3116
         If the mode is larger than `unsigned int', then synth_mult works
3117
         only if the constant value exactly fits in an `unsigned int' without
3118
         any truncation.  This means that multiplying by negative values does
3119
         not work; results are off by 2^32 on a 32 bit machine.  */
3120
 
3121
      if (GET_CODE (op1) == CONST_INT)
3122
        {
3123
          /* Attempt to handle multiplication of DImode values by negative
3124
             coefficients, by performing the multiplication by a positive
3125
             multiplier and then inverting the result.  */
3126
          if (INTVAL (op1) < 0
3127
              && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3128
            {
3129
              /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3130
                 result is interpreted as an unsigned coefficient.
3131
                 Exclude cost of op0 from max_cost to match the cost
3132
                 calculation of the synth_mult.  */
3133
              max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3134
                         - neg_cost[mode];
3135
              if (max_cost > 0
3136
                  && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3137
                                          &variant, max_cost))
3138
                {
3139
                  rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3140
                                                NULL_RTX, &algorithm,
3141
                                                variant);
3142
                  return expand_unop (mode, neg_optab, temp, target, 0);
3143
                }
3144
            }
3145
          else coeff = INTVAL (op1);
3146
        }
3147
      else if (GET_CODE (op1) == CONST_DOUBLE)
3148
        {
3149
          /* If we are multiplying in DImode, it may still be a win
3150
             to try to work with shifts and adds.  */
3151
          if (CONST_DOUBLE_HIGH (op1) == 0)
3152
            coeff = CONST_DOUBLE_LOW (op1);
3153
          else if (CONST_DOUBLE_LOW (op1) == 0
3154
                   && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3155
            {
3156
              int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3157
                          + HOST_BITS_PER_WIDE_INT;
3158
              return expand_shift (LSHIFT_EXPR, mode, op0,
3159
                                   build_int_cst (NULL_TREE, shift),
3160
                                   target, unsignedp);
3161
            }
3162
        }
3163
 
3164
      /* We used to test optimize here, on the grounds that it's better to
3165
         produce a smaller program when -O is not used.  But this causes
3166
         such a terrible slowdown sometimes that it seems better to always
3167
         use synth_mult.  */
3168
      if (coeff != 0)
3169
        {
3170
          /* Special case powers of two.  */
3171
          if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3172
            return expand_shift (LSHIFT_EXPR, mode, op0,
3173
                                 build_int_cst (NULL_TREE, floor_log2 (coeff)),
3174
                                 target, unsignedp);
3175
 
3176
          /* Exclude cost of op0 from max_cost to match the cost
3177
             calculation of the synth_mult.  */
3178
          max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3179
          if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3180
                                   max_cost))
3181
            return expand_mult_const (mode, op0, coeff, target,
3182
                                      &algorithm, variant);
3183
        }
3184
    }
3185
 
3186
  if (GET_CODE (op0) == CONST_DOUBLE)
3187
    {
3188
      rtx temp = op0;
3189
      op0 = op1;
3190
      op1 = temp;
3191
    }
3192
 
3193
  /* Expand x*2.0 as x+x.  */
3194
  if (GET_CODE (op1) == CONST_DOUBLE
3195
      && GET_MODE_CLASS (mode) == MODE_FLOAT)
3196
    {
3197
      REAL_VALUE_TYPE d;
3198
      REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3199
 
3200
      if (REAL_VALUES_EQUAL (d, dconst2))
3201
        {
3202
          op0 = force_reg (GET_MODE (op0), op0);
3203
          return expand_binop (mode, add_optab, op0, op0,
3204
                               target, unsignedp, OPTAB_LIB_WIDEN);
3205
        }
3206
    }
3207
 
3208
  /* This used to use umul_optab if unsigned, but for non-widening multiply
3209
     there is no difference between signed and unsigned.  */
3210
  op0 = expand_binop (mode,
3211
                      ! unsignedp
3212
                      && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3213
                      ? smulv_optab : smul_optab,
3214
                      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3215
  gcc_assert (op0);
3216
  return op0;
3217
}
3218
 
3219
/* Return the smallest n such that 2**n >= X.  */
3220
 
3221
int
3222
ceil_log2 (unsigned HOST_WIDE_INT x)
3223
{
3224
  return floor_log2 (x - 1) + 1;
3225
}
3226
 
3227
/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3228
   replace division by D, and put the least significant N bits of the result
3229
   in *MULTIPLIER_PTR and return the most significant bit.
3230
 
3231
   The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3232
   needed precision is in PRECISION (should be <= N).
3233
 
3234
   PRECISION should be as small as possible so this function can choose
3235
   multiplier more freely.
3236
 
3237
   The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3238
   is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3239
 
3240
   Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3241
   where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3242
 
3243
static
3244
unsigned HOST_WIDE_INT
3245
choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3246
                   rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3247
{
3248
  HOST_WIDE_INT mhigh_hi, mlow_hi;
3249
  unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3250
  int lgup, post_shift;
3251
  int pow, pow2;
3252
  unsigned HOST_WIDE_INT nl, dummy1;
3253
  HOST_WIDE_INT nh, dummy2;
3254
 
3255
  /* lgup = ceil(log2(divisor)); */
3256
  lgup = ceil_log2 (d);
3257
 
3258
  gcc_assert (lgup <= n);
3259
 
3260
  pow = n + lgup;
3261
  pow2 = n + lgup - precision;
3262
 
3263
  /* We could handle this with some effort, but this case is much
3264
     better handled directly with a scc insn, so rely on caller using
3265
     that.  */
3266
  gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3267
 
3268
  /* mlow = 2^(N + lgup)/d */
3269
 if (pow >= HOST_BITS_PER_WIDE_INT)
3270
    {
3271
      nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3272
      nl = 0;
3273
    }
3274
  else
3275
    {
3276
      nh = 0;
3277
      nl = (unsigned HOST_WIDE_INT) 1 << pow;
3278
    }
3279
  div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3280
                        &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3281
 
3282
  /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3283
  if (pow2 >= HOST_BITS_PER_WIDE_INT)
3284
    nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3285
  else
3286
    nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3287
  div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3288
                        &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3289
 
3290
  gcc_assert (!mhigh_hi || nh - d < d);
3291
  gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3292
  /* Assert that mlow < mhigh.  */
3293
  gcc_assert (mlow_hi < mhigh_hi
3294
              || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3295
 
3296
  /* If precision == N, then mlow, mhigh exceed 2^N
3297
     (but they do not exceed 2^(N+1)).  */
3298
 
3299
  /* Reduce to lowest terms.  */
3300
  for (post_shift = lgup; post_shift > 0; post_shift--)
3301
    {
3302
      unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3303
      unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3304
      if (ml_lo >= mh_lo)
3305
        break;
3306
 
3307
      mlow_hi = 0;
3308
      mlow_lo = ml_lo;
3309
      mhigh_hi = 0;
3310
      mhigh_lo = mh_lo;
3311
    }
3312
 
3313
  *post_shift_ptr = post_shift;
3314
  *lgup_ptr = lgup;
3315
  if (n < HOST_BITS_PER_WIDE_INT)
3316
    {
3317
      unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3318
      *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3319
      return mhigh_lo >= mask;
3320
    }
3321
  else
3322
    {
3323
      *multiplier_ptr = GEN_INT (mhigh_lo);
3324
      return mhigh_hi;
3325
    }
3326
}
3327
 
3328
/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3329
   congruent to 1 (mod 2**N).  */
3330
 
3331
static unsigned HOST_WIDE_INT
3332
invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3333
{
3334
  /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3335
 
3336
  /* The algorithm notes that the choice y = x satisfies
3337
     x*y == 1 mod 2^3, since x is assumed odd.
3338
     Each iteration doubles the number of bits of significance in y.  */
3339
 
3340
  unsigned HOST_WIDE_INT mask;
3341
  unsigned HOST_WIDE_INT y = x;
3342
  int nbit = 3;
3343
 
3344
  mask = (n == HOST_BITS_PER_WIDE_INT
3345
          ? ~(unsigned HOST_WIDE_INT) 0
3346
          : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3347
 
3348
  while (nbit < n)
3349
    {
3350
      y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3351
      nbit *= 2;
3352
    }
3353
  return y;
3354
}
3355
 
3356
/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3357
   flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3358
   product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3359
   to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3360
   become signed.
3361
 
3362
   The result is put in TARGET if that is convenient.
3363
 
3364
   MODE is the mode of operation.  */
3365
 
3366
rtx
3367
expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3368
                             rtx op1, rtx target, int unsignedp)
3369
{
3370
  rtx tem;
3371
  enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3372
 
3373
  tem = expand_shift (RSHIFT_EXPR, mode, op0,
3374
                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3375
                      NULL_RTX, 0);
3376
  tem = expand_and (mode, tem, op1, NULL_RTX);
3377
  adj_operand
3378
    = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3379
                     adj_operand);
3380
 
3381
  tem = expand_shift (RSHIFT_EXPR, mode, op1,
3382
                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3383
                      NULL_RTX, 0);
3384
  tem = expand_and (mode, tem, op0, NULL_RTX);
3385
  target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3386
                          target);
3387
 
3388
  return target;
3389
}
3390
 
3391
/* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3392
 
3393
static rtx
3394
extract_high_half (enum machine_mode mode, rtx op)
3395
{
3396
  enum machine_mode wider_mode;
3397
 
3398
  if (mode == word_mode)
3399
    return gen_highpart (mode, op);
3400
 
3401
  wider_mode = GET_MODE_WIDER_MODE (mode);
3402
  op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3403
                     build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3404
  return convert_modes (mode, wider_mode, op, 0);
3405
}
3406
 
3407
/* Like expand_mult_highpart, but only consider using a multiplication
3408
   optab.  OP1 is an rtx for the constant operand.  */
3409
 
3410
static rtx
3411
expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3412
                            rtx target, int unsignedp, int max_cost)
3413
{
3414
  rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3415
  enum machine_mode wider_mode;
3416
  optab moptab;
3417
  rtx tem;
3418
  int size;
3419
 
3420
  wider_mode = GET_MODE_WIDER_MODE (mode);
3421
  size = GET_MODE_BITSIZE (mode);
3422
 
3423
  /* Firstly, try using a multiplication insn that only generates the needed
3424
     high part of the product, and in the sign flavor of unsignedp.  */
3425
  if (mul_highpart_cost[mode] < max_cost)
3426
    {
3427
      moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3428
      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3429
                          unsignedp, OPTAB_DIRECT);
3430
      if (tem)
3431
        return tem;
3432
    }
3433
 
3434
  /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3435
     Need to adjust the result after the multiplication.  */
3436
  if (size - 1 < BITS_PER_WORD
3437
      && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3438
          + 4 * add_cost[mode] < max_cost))
3439
    {
3440
      moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3441
      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3442
                          unsignedp, OPTAB_DIRECT);
3443
      if (tem)
3444
        /* We used the wrong signedness.  Adjust the result.  */
3445
        return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3446
                                            tem, unsignedp);
3447
    }
3448
 
3449
  /* Try widening multiplication.  */
3450
  moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3451
  if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3452
      && mul_widen_cost[wider_mode] < max_cost)
3453
    {
3454
      tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3455
                          unsignedp, OPTAB_WIDEN);
3456
      if (tem)
3457
        return extract_high_half (mode, tem);
3458
    }
3459
 
3460
  /* Try widening the mode and perform a non-widening multiplication.  */
3461
  if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3462
      && size - 1 < BITS_PER_WORD
3463
      && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3464
    {
3465
      rtx insns, wop0, wop1;
3466
 
3467
      /* We need to widen the operands, for example to ensure the
3468
         constant multiplier is correctly sign or zero extended.
3469
         Use a sequence to clean-up any instructions emitted by
3470
         the conversions if things don't work out.  */
3471
      start_sequence ();
3472
      wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3473
      wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3474
      tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3475
                          unsignedp, OPTAB_WIDEN);
3476
      insns = get_insns ();
3477
      end_sequence ();
3478
 
3479
      if (tem)
3480
        {
3481
          emit_insn (insns);
3482
          return extract_high_half (mode, tem);
3483
        }
3484
    }
3485
 
3486
  /* Try widening multiplication of opposite signedness, and adjust.  */
3487
  moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3488
  if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3489
      && size - 1 < BITS_PER_WORD
3490
      && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3491
          + 4 * add_cost[mode] < max_cost))
3492
    {
3493
      tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3494
                          NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3495
      if (tem != 0)
3496
        {
3497
          tem = extract_high_half (mode, tem);
3498
          /* We used the wrong signedness.  Adjust the result.  */
3499
          return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3500
                                              target, unsignedp);
3501
        }
3502
    }
3503
 
3504
  return 0;
3505
}
3506
 
3507
/* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3508
   putting the high half of the result in TARGET if that is convenient,
3509
   and return where the result is.  If the operation can not be performed,
3510
 
3511
 
3512
   MODE is the mode of operation and result.
3513
 
3514
   UNSIGNEDP nonzero means unsigned multiply.
3515
 
3516
   MAX_COST is the total allowed cost for the expanded RTL.  */
3517
 
3518
static rtx
3519
expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3520
                      rtx target, int unsignedp, int max_cost)
3521
{
3522
  enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3523
  unsigned HOST_WIDE_INT cnst1;
3524
  int extra_cost;
3525
  bool sign_adjust = false;
3526
  enum mult_variant variant;
3527
  struct algorithm alg;
3528
  rtx tem;
3529
 
3530
  /* We can't support modes wider than HOST_BITS_PER_INT.  */
3531
  gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3532
 
3533
  cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3534
 
3535
  /* We can't optimize modes wider than BITS_PER_WORD.
3536
     ??? We might be able to perform double-word arithmetic if
3537
     mode == word_mode, however all the cost calculations in
3538
     synth_mult etc. assume single-word operations.  */
3539
  if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3540
    return expand_mult_highpart_optab (mode, op0, op1, target,
3541
                                       unsignedp, max_cost);
3542
 
3543
  extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3544
 
3545
  /* Check whether we try to multiply by a negative constant.  */
3546
  if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3547
    {
3548
      sign_adjust = true;
3549
      extra_cost += add_cost[mode];
3550
    }
3551
 
3552
  /* See whether shift/add multiplication is cheap enough.  */
3553
  if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3554
                           max_cost - extra_cost))
3555
    {
3556
      /* See whether the specialized multiplication optabs are
3557
         cheaper than the shift/add version.  */
3558
      tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3559
                                        alg.cost.cost + extra_cost);
3560
      if (tem)
3561
        return tem;
3562
 
3563
      tem = convert_to_mode (wider_mode, op0, unsignedp);
3564
      tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3565
      tem = extract_high_half (mode, tem);
3566
 
3567
      /* Adjust result for signedness.  */
3568
      if (sign_adjust)
3569
        tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3570
 
3571
      return tem;
3572
    }
3573
  return expand_mult_highpart_optab (mode, op0, op1, target,
3574
                                     unsignedp, max_cost);
3575
}
3576
 
3577
 
3578
/* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3579
 
3580
static rtx
3581
expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3582
{
3583
  unsigned HOST_WIDE_INT masklow, maskhigh;
3584
  rtx result, temp, shift, label;
3585
  int logd;
3586
 
3587
  logd = floor_log2 (d);
3588
  result = gen_reg_rtx (mode);
3589
 
3590
  /* Avoid conditional branches when they're expensive.  */
3591
  if (BRANCH_COST >= 2
3592
      && !optimize_size)
3593
    {
3594
      rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3595
                                      mode, 0, -1);
3596
      if (signmask)
3597
        {
3598
          signmask = force_reg (mode, signmask);
3599
          masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3600
          shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3601
 
3602
          /* Use the rtx_cost of a LSHIFTRT instruction to determine
3603
             which instruction sequence to use.  If logical right shifts
3604
             are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3605
             use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3606
 
3607
          temp = gen_rtx_LSHIFTRT (mode, result, shift);
3608
          if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3609
              || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3610
            {
3611
              temp = expand_binop (mode, xor_optab, op0, signmask,
3612
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3613
              temp = expand_binop (mode, sub_optab, temp, signmask,
3614
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3615
              temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3616
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3617
              temp = expand_binop (mode, xor_optab, temp, signmask,
3618
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3619
              temp = expand_binop (mode, sub_optab, temp, signmask,
3620
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3621
            }
3622
          else
3623
            {
3624
              signmask = expand_binop (mode, lshr_optab, signmask, shift,
3625
                                       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3626
              signmask = force_reg (mode, signmask);
3627
 
3628
              temp = expand_binop (mode, add_optab, op0, signmask,
3629
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3630
              temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3631
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3632
              temp = expand_binop (mode, sub_optab, temp, signmask,
3633
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3634
            }
3635
          return temp;
3636
        }
3637
    }
3638
 
3639
  /* Mask contains the mode's signbit and the significant bits of the
3640
     modulus.  By including the signbit in the operation, many targets
3641
     can avoid an explicit compare operation in the following comparison
3642
     against zero.  */
3643
 
3644
  masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3645
  if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3646
    {
3647
      masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3648
      maskhigh = -1;
3649
    }
3650
  else
3651
    maskhigh = (HOST_WIDE_INT) -1
3652
                 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3653
 
3654
  temp = expand_binop (mode, and_optab, op0,
3655
                       immed_double_const (masklow, maskhigh, mode),
3656
                       result, 1, OPTAB_LIB_WIDEN);
3657
  if (temp != result)
3658
    emit_move_insn (result, temp);
3659
 
3660
  label = gen_label_rtx ();
3661
  do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3662
 
3663
  temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3664
                       0, OPTAB_LIB_WIDEN);
3665
  masklow = (HOST_WIDE_INT) -1 << logd;
3666
  maskhigh = -1;
3667
  temp = expand_binop (mode, ior_optab, temp,
3668
                       immed_double_const (masklow, maskhigh, mode),
3669
                       result, 1, OPTAB_LIB_WIDEN);
3670
  temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3671
                       0, OPTAB_LIB_WIDEN);
3672
  if (temp != result)
3673
    emit_move_insn (result, temp);
3674
  emit_label (label);
3675
  return result;
3676
}
3677
 
3678
/* Expand signed division of OP0 by a power of two D in mode MODE.
3679
   This routine is only called for positive values of D.  */
3680
 
3681
static rtx
3682
expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3683
{
3684
  rtx temp, label;
3685
  tree shift;
3686
  int logd;
3687
 
3688
  logd = floor_log2 (d);
3689
  shift = build_int_cst (NULL_TREE, logd);
3690
 
3691
  if (d == 2 && BRANCH_COST >= 1)
3692
    {
3693
      temp = gen_reg_rtx (mode);
3694
      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3695
      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3696
                           0, OPTAB_LIB_WIDEN);
3697
      return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3698
    }
3699
 
3700
#ifdef HAVE_conditional_move
3701
  if (BRANCH_COST >= 2)
3702
    {
3703
      rtx temp2;
3704
 
3705
      /* ??? emit_conditional_move forces a stack adjustment via
3706
         compare_from_rtx so, if the sequence is discarded, it will
3707
         be lost.  Do it now instead.  */
3708
      do_pending_stack_adjust ();
3709
 
3710
      start_sequence ();
3711
      temp2 = copy_to_mode_reg (mode, op0);
3712
      temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3713
                           NULL_RTX, 0, OPTAB_LIB_WIDEN);
3714
      temp = force_reg (mode, temp);
3715
 
3716
      /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3717
      temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3718
                                     mode, temp, temp2, mode, 0);
3719
      if (temp2)
3720
        {
3721
          rtx seq = get_insns ();
3722
          end_sequence ();
3723
          emit_insn (seq);
3724
          return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3725
        }
3726
      end_sequence ();
3727
    }
3728
#endif
3729
 
3730
  if (BRANCH_COST >= 2)
3731
    {
3732
      int ushift = GET_MODE_BITSIZE (mode) - logd;
3733
 
3734
      temp = gen_reg_rtx (mode);
3735
      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3736
      if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3737
        temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3738
                             NULL_RTX, 0, OPTAB_LIB_WIDEN);
3739
      else
3740
        temp = expand_shift (RSHIFT_EXPR, mode, temp,
3741
                             build_int_cst (NULL_TREE, ushift),
3742
                             NULL_RTX, 1);
3743
      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3744
                           0, OPTAB_LIB_WIDEN);
3745
      return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3746
    }
3747
 
3748
  label = gen_label_rtx ();
3749
  temp = copy_to_mode_reg (mode, op0);
3750
  do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3751
  expand_inc (temp, GEN_INT (d - 1));
3752
  emit_label (label);
3753
  return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3754
}
3755
 
3756
/* Emit the code to divide OP0 by OP1, putting the result in TARGET
3757
   if that is convenient, and returning where the result is.
3758
   You may request either the quotient or the remainder as the result;
3759
   specify REM_FLAG nonzero to get the remainder.
3760
 
3761
   CODE is the expression code for which kind of division this is;
3762
   it controls how rounding is done.  MODE is the machine mode to use.
3763
   UNSIGNEDP nonzero means do unsigned division.  */
3764
 
3765
/* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3766
   and then correct it by or'ing in missing high bits
3767
   if result of ANDI is nonzero.
3768
   For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3769
   This could optimize to a bfexts instruction.
3770
   But C doesn't use these operations, so their optimizations are
3771
   left for later.  */
3772
/* ??? For modulo, we don't actually need the highpart of the first product,
3773
   the low part will do nicely.  And for small divisors, the second multiply
3774
   can also be a low-part only multiply or even be completely left out.
3775
   E.g. to calculate the remainder of a division by 3 with a 32 bit
3776
   multiply, multiply with 0x55555556 and extract the upper two bits;
3777
   the result is exact for inputs up to 0x1fffffff.
3778
   The input range can be reduced by using cross-sum rules.
3779
   For odd divisors >= 3, the following table gives right shift counts
3780
   so that if a number is shifted by an integer multiple of the given
3781
   amount, the remainder stays the same:
3782
   2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3783
   14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3784
   0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3785
   20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3786
   0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3787
 
3788
   Cross-sum rules for even numbers can be derived by leaving as many bits
3789
   to the right alone as the divisor has zeros to the right.
3790
   E.g. if x is an unsigned 32 bit number:
3791
   (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3792
   */
3793
 
3794
rtx
3795
expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3796
               rtx op0, rtx op1, rtx target, int unsignedp)
3797
{
3798
  enum machine_mode compute_mode;
3799
  rtx tquotient;
3800
  rtx quotient = 0, remainder = 0;
3801
  rtx last;
3802
  int size;
3803
  rtx insn, set;
3804
  optab optab1, optab2;
3805
  int op1_is_constant, op1_is_pow2 = 0;
3806
  int max_cost, extra_cost;
3807
  static HOST_WIDE_INT last_div_const = 0;
3808
  static HOST_WIDE_INT ext_op1;
3809
 
3810
  op1_is_constant = GET_CODE (op1) == CONST_INT;
3811
  if (op1_is_constant)
3812
    {
3813
      ext_op1 = INTVAL (op1);
3814
      if (unsignedp)
3815
        ext_op1 &= GET_MODE_MASK (mode);
3816
      op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3817
                     || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3818
    }
3819
 
3820
  /*
3821
     This is the structure of expand_divmod:
3822
 
3823
     First comes code to fix up the operands so we can perform the operations
3824
     correctly and efficiently.
3825
 
3826
     Second comes a switch statement with code specific for each rounding mode.
3827
     For some special operands this code emits all RTL for the desired
3828
     operation, for other cases, it generates only a quotient and stores it in
3829
     QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3830
     to indicate that it has not done anything.
3831
 
3832
     Last comes code that finishes the operation.  If QUOTIENT is set and
3833
     REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3834
     QUOTIENT is not set, it is computed using trunc rounding.
3835
 
3836
     We try to generate special code for division and remainder when OP1 is a
3837
     constant.  If |OP1| = 2**n we can use shifts and some other fast
3838
     operations.  For other values of OP1, we compute a carefully selected
3839
     fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3840
     by m.
3841
 
3842
     In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3843
     half of the product.  Different strategies for generating the product are
3844
     implemented in expand_mult_highpart.
3845
 
3846
     If what we actually want is the remainder, we generate that by another
3847
     by-constant multiplication and a subtraction.  */
3848
 
3849
  /* We shouldn't be called with OP1 == const1_rtx, but some of the
3850
     code below will malfunction if we are, so check here and handle
3851
     the special case if so.  */
3852
  if (op1 == const1_rtx)
3853
    return rem_flag ? const0_rtx : op0;
3854
 
3855
    /* When dividing by -1, we could get an overflow.
3856
     negv_optab can handle overflows.  */
3857
  if (! unsignedp && op1 == constm1_rtx)
3858
    {
3859
      if (rem_flag)
3860
        return const0_rtx;
3861
      return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3862
                          ? negv_optab : neg_optab, op0, target, 0);
3863
    }
3864
 
3865
  if (target
3866
      /* Don't use the function value register as a target
3867
         since we have to read it as well as write it,
3868
         and function-inlining gets confused by this.  */
3869
      && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3870
          /* Don't clobber an operand while doing a multi-step calculation.  */
3871
          || ((rem_flag || op1_is_constant)
3872
              && (reg_mentioned_p (target, op0)
3873
                  || (MEM_P (op0) && MEM_P (target))))
3874
          || reg_mentioned_p (target, op1)
3875
          || (MEM_P (op1) && MEM_P (target))))
3876
    target = 0;
3877
 
3878
  /* Get the mode in which to perform this computation.  Normally it will
3879
     be MODE, but sometimes we can't do the desired operation in MODE.
3880
     If so, pick a wider mode in which we can do the operation.  Convert
3881
     to that mode at the start to avoid repeated conversions.
3882
 
3883
     First see what operations we need.  These depend on the expression
3884
     we are evaluating.  (We assume that divxx3 insns exist under the
3885
     same conditions that modxx3 insns and that these insns don't normally
3886
     fail.  If these assumptions are not correct, we may generate less
3887
     efficient code in some cases.)
3888
 
3889
     Then see if we find a mode in which we can open-code that operation
3890
     (either a division, modulus, or shift).  Finally, check for the smallest
3891
     mode for which we can do the operation with a library call.  */
3892
 
3893
  /* We might want to refine this now that we have division-by-constant
3894
     optimization.  Since expand_mult_highpart tries so many variants, it is
3895
     not straightforward to generalize this.  Maybe we should make an array
3896
     of possible modes in init_expmed?  Save this for GCC 2.7.  */
3897
 
3898
  optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3899
            ? (unsignedp ? lshr_optab : ashr_optab)
3900
            : (unsignedp ? udiv_optab : sdiv_optab));
3901
  optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3902
            ? optab1
3903
            : (unsignedp ? udivmod_optab : sdivmod_optab));
3904
 
3905
  for (compute_mode = mode; compute_mode != VOIDmode;
3906
       compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3907
    if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3908
        || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3909
      break;
3910
 
3911
  if (compute_mode == VOIDmode)
3912
    for (compute_mode = mode; compute_mode != VOIDmode;
3913
         compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3914
      if (optab1->handlers[compute_mode].libfunc
3915
          || optab2->handlers[compute_mode].libfunc)
3916
        break;
3917
 
3918
  /* If we still couldn't find a mode, use MODE, but expand_binop will
3919
     probably die.  */
3920
  if (compute_mode == VOIDmode)
3921
    compute_mode = mode;
3922
 
3923
  if (target && GET_MODE (target) == compute_mode)
3924
    tquotient = target;
3925
  else
3926
    tquotient = gen_reg_rtx (compute_mode);
3927
 
3928
  size = GET_MODE_BITSIZE (compute_mode);
3929
#if 0
3930
  /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3931
     (mode), and thereby get better code when OP1 is a constant.  Do that
3932
     later.  It will require going over all usages of SIZE below.  */
3933
  size = GET_MODE_BITSIZE (mode);
3934
#endif
3935
 
3936
  /* Only deduct something for a REM if the last divide done was
3937
     for a different constant.   Then set the constant of the last
3938
     divide.  */
3939
  max_cost = div_cost[compute_mode]
3940
    - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3941
                      && INTVAL (op1) == last_div_const)
3942
       ? mul_cost[compute_mode] + add_cost[compute_mode]
3943
       : 0);
3944
 
3945
  last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3946
 
3947
  /* Now convert to the best mode to use.  */
3948
  if (compute_mode != mode)
3949
    {
3950
      op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3951
      op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3952
 
3953
      /* convert_modes may have placed op1 into a register, so we
3954
         must recompute the following.  */
3955
      op1_is_constant = GET_CODE (op1) == CONST_INT;
3956
      op1_is_pow2 = (op1_is_constant
3957
                     && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3958
                          || (! unsignedp
3959
                              && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3960
    }
3961
 
3962
  /* If one of the operands is a volatile MEM, copy it into a register.  */
3963
 
3964
  if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3965
    op0 = force_reg (compute_mode, op0);
3966
  if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3967
    op1 = force_reg (compute_mode, op1);
3968
 
3969
  /* If we need the remainder or if OP1 is constant, we need to
3970
     put OP0 in a register in case it has any queued subexpressions.  */
3971
  if (rem_flag || op1_is_constant)
3972
    op0 = force_reg (compute_mode, op0);
3973
 
3974
  last = get_last_insn ();
3975
 
3976
  /* Promote floor rounding to trunc rounding for unsigned operations.  */
3977
  if (unsignedp)
3978
    {
3979
      if (code == FLOOR_DIV_EXPR)
3980
        code = TRUNC_DIV_EXPR;
3981
      if (code == FLOOR_MOD_EXPR)
3982
        code = TRUNC_MOD_EXPR;
3983
      if (code == EXACT_DIV_EXPR && op1_is_pow2)
3984
        code = TRUNC_DIV_EXPR;
3985
    }
3986
 
3987
  if (op1 != const0_rtx)
3988
    switch (code)
3989
      {
3990
      case TRUNC_MOD_EXPR:
3991
      case TRUNC_DIV_EXPR:
3992
        if (op1_is_constant)
3993
          {
3994
            if (unsignedp)
3995
              {
3996
                unsigned HOST_WIDE_INT mh;
3997
                int pre_shift, post_shift;
3998
                int dummy;
3999
                rtx ml;
4000
                unsigned HOST_WIDE_INT d = (INTVAL (op1)
4001
                                            & GET_MODE_MASK (compute_mode));
4002
 
4003
                if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4004
                  {
4005
                    pre_shift = floor_log2 (d);
4006
                    if (rem_flag)
4007
                      {
4008
                        remainder
4009
                          = expand_binop (compute_mode, and_optab, op0,
4010
                                          GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4011
                                          remainder, 1,
4012
                                          OPTAB_LIB_WIDEN);
4013
                        if (remainder)
4014
                          return gen_lowpart (mode, remainder);
4015
                      }
4016
                    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4017
                                             build_int_cst (NULL_TREE,
4018
                                                            pre_shift),
4019
                                             tquotient, 1);
4020
                  }
4021
                else if (size <= HOST_BITS_PER_WIDE_INT)
4022
                  {
4023
                    if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4024
                      {
4025
                        /* Most significant bit of divisor is set; emit an scc
4026
                           insn.  */
4027
                        quotient = emit_store_flag (tquotient, GEU, op0, op1,
4028
                                                    compute_mode, 1, 1);
4029
                        if (quotient == 0)
4030
                          goto fail1;
4031
                      }
4032
                    else
4033
                      {
4034
                        /* Find a suitable multiplier and right shift count
4035
                           instead of multiplying with D.  */
4036
 
4037
                        mh = choose_multiplier (d, size, size,
4038
                                                &ml, &post_shift, &dummy);
4039
 
4040
                        /* If the suggested multiplier is more than SIZE bits,
4041
                           we can do better for even divisors, using an
4042
                           initial right shift.  */
4043
                        if (mh != 0 && (d & 1) == 0)
4044
                          {
4045
                            pre_shift = floor_log2 (d & -d);
4046
                            mh = choose_multiplier (d >> pre_shift, size,
4047
                                                    size - pre_shift,
4048
                                                    &ml, &post_shift, &dummy);
4049
                            gcc_assert (!mh);
4050
                          }
4051
                        else
4052
                          pre_shift = 0;
4053
 
4054
                        if (mh != 0)
4055
                          {
4056
                            rtx t1, t2, t3, t4;
4057
 
4058
                            if (post_shift - 1 >= BITS_PER_WORD)
4059
                              goto fail1;
4060
 
4061
                            extra_cost
4062
                              = (shift_cost[compute_mode][post_shift - 1]
4063
                                 + shift_cost[compute_mode][1]
4064
                                 + 2 * add_cost[compute_mode]);
4065
                            t1 = expand_mult_highpart (compute_mode, op0, ml,
4066
                                                       NULL_RTX, 1,
4067
                                                       max_cost - extra_cost);
4068
                            if (t1 == 0)
4069
                              goto fail1;
4070
                            t2 = force_operand (gen_rtx_MINUS (compute_mode,
4071
                                                               op0, t1),
4072
                                                NULL_RTX);
4073
                            t3 = expand_shift
4074
                              (RSHIFT_EXPR, compute_mode, t2,
4075
                               build_int_cst (NULL_TREE, 1),
4076
                               NULL_RTX,1);
4077
                            t4 = force_operand (gen_rtx_PLUS (compute_mode,
4078
                                                              t1, t3),
4079
                                                NULL_RTX);
4080
                            quotient = expand_shift
4081
                              (RSHIFT_EXPR, compute_mode, t4,
4082
                               build_int_cst (NULL_TREE, post_shift - 1),
4083
                               tquotient, 1);
4084
                          }
4085
                        else
4086
                          {
4087
                            rtx t1, t2;
4088
 
4089
                            if (pre_shift >= BITS_PER_WORD
4090
                                || post_shift >= BITS_PER_WORD)
4091
                              goto fail1;
4092
 
4093
                            t1 = expand_shift
4094
                              (RSHIFT_EXPR, compute_mode, op0,
4095
                               build_int_cst (NULL_TREE, pre_shift),
4096
                               NULL_RTX, 1);
4097
                            extra_cost
4098
                              = (shift_cost[compute_mode][pre_shift]
4099
                                 + shift_cost[compute_mode][post_shift]);
4100
                            t2 = expand_mult_highpart (compute_mode, t1, ml,
4101
                                                       NULL_RTX, 1,
4102
                                                       max_cost - extra_cost);
4103
                            if (t2 == 0)
4104
                              goto fail1;
4105
                            quotient = expand_shift
4106
                              (RSHIFT_EXPR, compute_mode, t2,
4107
                               build_int_cst (NULL_TREE, post_shift),
4108
                               tquotient, 1);
4109
                          }
4110
                      }
4111
                  }
4112
                else            /* Too wide mode to use tricky code */
4113
                  break;
4114
 
4115
                insn = get_last_insn ();
4116
                if (insn != last
4117
                    && (set = single_set (insn)) != 0
4118
                    && SET_DEST (set) == quotient)
4119
                  set_unique_reg_note (insn,
4120
                                       REG_EQUAL,
4121
                                       gen_rtx_UDIV (compute_mode, op0, op1));
4122
              }
4123
            else                /* TRUNC_DIV, signed */
4124
              {
4125
                unsigned HOST_WIDE_INT ml;
4126
                int lgup, post_shift;
4127
                rtx mlr;
4128
                HOST_WIDE_INT d = INTVAL (op1);
4129
                unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4130
 
4131
                /* n rem d = n rem -d */
4132
                if (rem_flag && d < 0)
4133
                  {
4134
                    d = abs_d;
4135
                    op1 = gen_int_mode (abs_d, compute_mode);
4136
                  }
4137
 
4138
                if (d == 1)
4139
                  quotient = op0;
4140
                else if (d == -1)
4141
                  quotient = expand_unop (compute_mode, neg_optab, op0,
4142
                                          tquotient, 0);
4143
                else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4144
                  {
4145
                    /* This case is not handled correctly below.  */
4146
                    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4147
                                                compute_mode, 1, 1);
4148
                    if (quotient == 0)
4149
                      goto fail1;
4150
                  }
4151
                else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4152
                         && (rem_flag ? smod_pow2_cheap[compute_mode]
4153
                                      : sdiv_pow2_cheap[compute_mode])
4154
                         /* We assume that cheap metric is true if the
4155
                            optab has an expander for this mode.  */
4156
                         && (((rem_flag ? smod_optab : sdiv_optab)
4157
                              ->handlers[compute_mode].insn_code
4158
                              != CODE_FOR_nothing)
4159
                             || (sdivmod_optab->handlers[compute_mode]
4160
                                 .insn_code != CODE_FOR_nothing)))
4161
                  ;
4162
                else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4163
                  {
4164
                    if (rem_flag)
4165
                      {
4166
                        remainder = expand_smod_pow2 (compute_mode, op0, d);
4167
                        if (remainder)
4168
                          return gen_lowpart (mode, remainder);
4169
                      }
4170
 
4171
                    if (sdiv_pow2_cheap[compute_mode]
4172
                        && ((sdiv_optab->handlers[compute_mode].insn_code
4173
                             != CODE_FOR_nothing)
4174
                            || (sdivmod_optab->handlers[compute_mode].insn_code
4175
                                != CODE_FOR_nothing)))
4176
                      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4177
                                                compute_mode, op0,
4178
                                                gen_int_mode (abs_d,
4179
                                                              compute_mode),
4180
                                                NULL_RTX, 0);
4181
                    else
4182
                      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4183
 
4184
                    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4185
                       negate the quotient.  */
4186
                    if (d < 0)
4187
                      {
4188
                        insn = get_last_insn ();
4189
                        if (insn != last
4190
                            && (set = single_set (insn)) != 0
4191
                            && SET_DEST (set) == quotient
4192
                            && abs_d < ((unsigned HOST_WIDE_INT) 1
4193
                                        << (HOST_BITS_PER_WIDE_INT - 1)))
4194
                          set_unique_reg_note (insn,
4195
                                               REG_EQUAL,
4196
                                               gen_rtx_DIV (compute_mode,
4197
                                                            op0,
4198
                                                            GEN_INT
4199
                                                            (trunc_int_for_mode
4200
                                                             (abs_d,
4201
                                                              compute_mode))));
4202
 
4203
                        quotient = expand_unop (compute_mode, neg_optab,
4204
                                                quotient, quotient, 0);
4205
                      }
4206
                  }
4207
                else if (size <= HOST_BITS_PER_WIDE_INT)
4208
                  {
4209
                    choose_multiplier (abs_d, size, size - 1,
4210
                                       &mlr, &post_shift, &lgup);
4211
                    ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4212
                    if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4213
                      {
4214
                        rtx t1, t2, t3;
4215
 
4216
                        if (post_shift >= BITS_PER_WORD
4217
                            || size - 1 >= BITS_PER_WORD)
4218
                          goto fail1;
4219
 
4220
                        extra_cost = (shift_cost[compute_mode][post_shift]
4221
                                      + shift_cost[compute_mode][size - 1]
4222
                                      + add_cost[compute_mode]);
4223
                        t1 = expand_mult_highpart (compute_mode, op0, mlr,
4224
                                                   NULL_RTX, 0,
4225
                                                   max_cost - extra_cost);
4226
                        if (t1 == 0)
4227
                          goto fail1;
4228
                        t2 = expand_shift
4229
                          (RSHIFT_EXPR, compute_mode, t1,
4230
                           build_int_cst (NULL_TREE, post_shift),
4231
                           NULL_RTX, 0);
4232
                        t3 = expand_shift
4233
                          (RSHIFT_EXPR, compute_mode, op0,
4234
                           build_int_cst (NULL_TREE, size - 1),
4235
                           NULL_RTX, 0);
4236
                        if (d < 0)
4237
                          quotient
4238
                            = force_operand (gen_rtx_MINUS (compute_mode,
4239
                                                            t3, t2),
4240
                                             tquotient);
4241
                        else
4242
                          quotient
4243
                            = force_operand (gen_rtx_MINUS (compute_mode,
4244
                                                            t2, t3),
4245
                                             tquotient);
4246
                      }
4247
                    else
4248
                      {
4249
                        rtx t1, t2, t3, t4;
4250
 
4251
                        if (post_shift >= BITS_PER_WORD
4252
                            || size - 1 >= BITS_PER_WORD)
4253
                          goto fail1;
4254
 
4255
                        ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4256
                        mlr = gen_int_mode (ml, compute_mode);
4257
                        extra_cost = (shift_cost[compute_mode][post_shift]
4258
                                      + shift_cost[compute_mode][size - 1]
4259
                                      + 2 * add_cost[compute_mode]);
4260
                        t1 = expand_mult_highpart (compute_mode, op0, mlr,
4261
                                                   NULL_RTX, 0,
4262
                                                   max_cost - extra_cost);
4263
                        if (t1 == 0)
4264
                          goto fail1;
4265
                        t2 = force_operand (gen_rtx_PLUS (compute_mode,
4266
                                                          t1, op0),
4267
                                            NULL_RTX);
4268
                        t3 = expand_shift
4269
                          (RSHIFT_EXPR, compute_mode, t2,
4270
                           build_int_cst (NULL_TREE, post_shift),
4271
                           NULL_RTX, 0);
4272
                        t4 = expand_shift
4273
                          (RSHIFT_EXPR, compute_mode, op0,
4274
                           build_int_cst (NULL_TREE, size - 1),
4275
                           NULL_RTX, 0);
4276
                        if (d < 0)
4277
                          quotient
4278
                            = force_operand (gen_rtx_MINUS (compute_mode,
4279
                                                            t4, t3),
4280
                                             tquotient);
4281
                        else
4282
                          quotient
4283
                            = force_operand (gen_rtx_MINUS (compute_mode,
4284
                                                            t3, t4),
4285
                                             tquotient);
4286
                      }
4287
                  }
4288
                else            /* Too wide mode to use tricky code */
4289
                  break;
4290
 
4291
                insn = get_last_insn ();
4292
                if (insn != last
4293
                    && (set = single_set (insn)) != 0
4294
                    && SET_DEST (set) == quotient)
4295
                  set_unique_reg_note (insn,
4296
                                       REG_EQUAL,
4297
                                       gen_rtx_DIV (compute_mode, op0, op1));
4298
              }
4299
            break;
4300
          }
4301
      fail1:
4302
        delete_insns_since (last);
4303
        break;
4304
 
4305
      case FLOOR_DIV_EXPR:
4306
      case FLOOR_MOD_EXPR:
4307
      /* We will come here only for signed operations.  */
4308
        if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4309
          {
4310
            unsigned HOST_WIDE_INT mh;
4311
            int pre_shift, lgup, post_shift;
4312
            HOST_WIDE_INT d = INTVAL (op1);
4313
            rtx ml;
4314
 
4315
            if (d > 0)
4316
              {
4317
                /* We could just as easily deal with negative constants here,
4318
                   but it does not seem worth the trouble for GCC 2.6.  */
4319
                if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4320
                  {
4321
                    pre_shift = floor_log2 (d);
4322
                    if (rem_flag)
4323
                      {
4324
                        remainder = expand_binop (compute_mode, and_optab, op0,
4325
                                                  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4326
                                                  remainder, 0, OPTAB_LIB_WIDEN);
4327
                        if (remainder)
4328
                          return gen_lowpart (mode, remainder);
4329
                      }
4330
                    quotient = expand_shift
4331
                      (RSHIFT_EXPR, compute_mode, op0,
4332
                       build_int_cst (NULL_TREE, pre_shift),
4333
                       tquotient, 0);
4334
                  }
4335
                else
4336
                  {
4337
                    rtx t1, t2, t3, t4;
4338
 
4339
                    mh = choose_multiplier (d, size, size - 1,
4340
                                            &ml, &post_shift, &lgup);
4341
                    gcc_assert (!mh);
4342
 
4343
                    if (post_shift < BITS_PER_WORD
4344
                        && size - 1 < BITS_PER_WORD)
4345
                      {
4346
                        t1 = expand_shift
4347
                          (RSHIFT_EXPR, compute_mode, op0,
4348
                           build_int_cst (NULL_TREE, size - 1),
4349
                           NULL_RTX, 0);
4350
                        t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4351
                                           NULL_RTX, 0, OPTAB_WIDEN);
4352
                        extra_cost = (shift_cost[compute_mode][post_shift]
4353
                                      + shift_cost[compute_mode][size - 1]
4354
                                      + 2 * add_cost[compute_mode]);
4355
                        t3 = expand_mult_highpart (compute_mode, t2, ml,
4356
                                                   NULL_RTX, 1,
4357
                                                   max_cost - extra_cost);
4358
                        if (t3 != 0)
4359
                          {
4360
                            t4 = expand_shift
4361
                              (RSHIFT_EXPR, compute_mode, t3,
4362
                               build_int_cst (NULL_TREE, post_shift),
4363
                               NULL_RTX, 1);
4364
                            quotient = expand_binop (compute_mode, xor_optab,
4365
                                                     t4, t1, tquotient, 0,
4366
                                                     OPTAB_WIDEN);
4367
                          }
4368
                      }
4369
                  }
4370
              }
4371
            else
4372
              {
4373
                rtx nsign, t1, t2, t3, t4;
4374
                t1 = force_operand (gen_rtx_PLUS (compute_mode,
4375
                                                  op0, constm1_rtx), NULL_RTX);
4376
                t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4377
                                   0, OPTAB_WIDEN);
4378
                nsign = expand_shift
4379
                  (RSHIFT_EXPR, compute_mode, t2,
4380
                   build_int_cst (NULL_TREE, size - 1),
4381
                   NULL_RTX, 0);
4382
                t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4383
                                    NULL_RTX);
4384
                t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4385
                                    NULL_RTX, 0);
4386
                if (t4)
4387
                  {
4388
                    rtx t5;
4389
                    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4390
                                      NULL_RTX, 0);
4391
                    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4392
                                                            t4, t5),
4393
                                              tquotient);
4394
                  }
4395
              }
4396
          }
4397
 
4398
        if (quotient != 0)
4399
          break;
4400
        delete_insns_since (last);
4401
 
4402
        /* Try using an instruction that produces both the quotient and
4403
           remainder, using truncation.  We can easily compensate the quotient
4404
           or remainder to get floor rounding, once we have the remainder.
4405
           Notice that we compute also the final remainder value here,
4406
           and return the result right away.  */
4407
        if (target == 0 || GET_MODE (target) != compute_mode)
4408
          target = gen_reg_rtx (compute_mode);
4409
 
4410
        if (rem_flag)
4411
          {
4412
            remainder
4413
              = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4414
            quotient = gen_reg_rtx (compute_mode);
4415
          }
4416
        else
4417
          {
4418
            quotient
4419
              = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4420
            remainder = gen_reg_rtx (compute_mode);
4421
          }
4422
 
4423
        if (expand_twoval_binop (sdivmod_optab, op0, op1,
4424
                                 quotient, remainder, 0))
4425
          {
4426
            /* This could be computed with a branch-less sequence.
4427
               Save that for later.  */
4428
            rtx tem;
4429
            rtx label = gen_label_rtx ();
4430
            do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4431
            tem = expand_binop (compute_mode, xor_optab, op0, op1,
4432
                                NULL_RTX, 0, OPTAB_WIDEN);
4433
            do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4434
            expand_dec (quotient, const1_rtx);
4435
            expand_inc (remainder, op1);
4436
            emit_label (label);
4437
            return gen_lowpart (mode, rem_flag ? remainder : quotient);
4438
          }
4439
 
4440
        /* No luck with division elimination or divmod.  Have to do it
4441
           by conditionally adjusting op0 *and* the result.  */
4442
        {
4443
          rtx label1, label2, label3, label4, label5;
4444
          rtx adjusted_op0;
4445
          rtx tem;
4446
 
4447
          quotient = gen_reg_rtx (compute_mode);
4448
          adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4449
          label1 = gen_label_rtx ();
4450
          label2 = gen_label_rtx ();
4451
          label3 = gen_label_rtx ();
4452
          label4 = gen_label_rtx ();
4453
          label5 = gen_label_rtx ();
4454
          do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4455
          do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4456
          tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4457
                              quotient, 0, OPTAB_LIB_WIDEN);
4458
          if (tem != quotient)
4459
            emit_move_insn (quotient, tem);
4460
          emit_jump_insn (gen_jump (label5));
4461
          emit_barrier ();
4462
          emit_label (label1);
4463
          expand_inc (adjusted_op0, const1_rtx);
4464
          emit_jump_insn (gen_jump (label4));
4465
          emit_barrier ();
4466
          emit_label (label2);
4467
          do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4468
          tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4469
                              quotient, 0, OPTAB_LIB_WIDEN);
4470
          if (tem != quotient)
4471
            emit_move_insn (quotient, tem);
4472
          emit_jump_insn (gen_jump (label5));
4473
          emit_barrier ();
4474
          emit_label (label3);
4475
          expand_dec (adjusted_op0, const1_rtx);
4476
          emit_label (label4);
4477
          tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4478
                              quotient, 0, OPTAB_LIB_WIDEN);
4479
          if (tem != quotient)
4480
            emit_move_insn (quotient, tem);
4481
          expand_dec (quotient, const1_rtx);
4482
          emit_label (label5);
4483
        }
4484
        break;
4485
 
4486
      case CEIL_DIV_EXPR:
4487
      case CEIL_MOD_EXPR:
4488
        if (unsignedp)
4489
          {
4490
            if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4491
              {
4492
                rtx t1, t2, t3;
4493
                unsigned HOST_WIDE_INT d = INTVAL (op1);
4494
                t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4495
                                   build_int_cst (NULL_TREE, floor_log2 (d)),
4496
                                   tquotient, 1);
4497
                t2 = expand_binop (compute_mode, and_optab, op0,
4498
                                   GEN_INT (d - 1),
4499
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4500
                t3 = gen_reg_rtx (compute_mode);
4501
                t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4502
                                      compute_mode, 1, 1);
4503
                if (t3 == 0)
4504
                  {
4505
                    rtx lab;
4506
                    lab = gen_label_rtx ();
4507
                    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4508
                    expand_inc (t1, const1_rtx);
4509
                    emit_label (lab);
4510
                    quotient = t1;
4511
                  }
4512
                else
4513
                  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4514
                                                          t1, t3),
4515
                                            tquotient);
4516
                break;
4517
              }
4518
 
4519
            /* Try using an instruction that produces both the quotient and
4520
               remainder, using truncation.  We can easily compensate the
4521
               quotient or remainder to get ceiling rounding, once we have the
4522
               remainder.  Notice that we compute also the final remainder
4523
               value here, and return the result right away.  */
4524
            if (target == 0 || GET_MODE (target) != compute_mode)
4525
              target = gen_reg_rtx (compute_mode);
4526
 
4527
            if (rem_flag)
4528
              {
4529
                remainder = (REG_P (target)
4530
                             ? target : gen_reg_rtx (compute_mode));
4531
                quotient = gen_reg_rtx (compute_mode);
4532
              }
4533
            else
4534
              {
4535
                quotient = (REG_P (target)
4536
                            ? target : gen_reg_rtx (compute_mode));
4537
                remainder = gen_reg_rtx (compute_mode);
4538
              }
4539
 
4540
            if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4541
                                     remainder, 1))
4542
              {
4543
                /* This could be computed with a branch-less sequence.
4544
                   Save that for later.  */
4545
                rtx label = gen_label_rtx ();
4546
                do_cmp_and_jump (remainder, const0_rtx, EQ,
4547
                                 compute_mode, label);
4548
                expand_inc (quotient, const1_rtx);
4549
                expand_dec (remainder, op1);
4550
                emit_label (label);
4551
                return gen_lowpart (mode, rem_flag ? remainder : quotient);
4552
              }
4553
 
4554
            /* No luck with division elimination or divmod.  Have to do it
4555
               by conditionally adjusting op0 *and* the result.  */
4556
            {
4557
              rtx label1, label2;
4558
              rtx adjusted_op0, tem;
4559
 
4560
              quotient = gen_reg_rtx (compute_mode);
4561
              adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4562
              label1 = gen_label_rtx ();
4563
              label2 = gen_label_rtx ();
4564
              do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4565
                               compute_mode, label1);
4566
              emit_move_insn  (quotient, const0_rtx);
4567
              emit_jump_insn (gen_jump (label2));
4568
              emit_barrier ();
4569
              emit_label (label1);
4570
              expand_dec (adjusted_op0, const1_rtx);
4571
              tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4572
                                  quotient, 1, OPTAB_LIB_WIDEN);
4573
              if (tem != quotient)
4574
                emit_move_insn (quotient, tem);
4575
              expand_inc (quotient, const1_rtx);
4576
              emit_label (label2);
4577
            }
4578
          }
4579
        else /* signed */
4580
          {
4581
            if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4582
                && INTVAL (op1) >= 0)
4583
              {
4584
                /* This is extremely similar to the code for the unsigned case
4585
                   above.  For 2.7 we should merge these variants, but for
4586
                   2.6.1 I don't want to touch the code for unsigned since that
4587
                   get used in C.  The signed case will only be used by other
4588
                   languages (Ada).  */
4589
 
4590
                rtx t1, t2, t3;
4591
                unsigned HOST_WIDE_INT d = INTVAL (op1);
4592
                t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4593
                                   build_int_cst (NULL_TREE, floor_log2 (d)),
4594
                                   tquotient, 0);
4595
                t2 = expand_binop (compute_mode, and_optab, op0,
4596
                                   GEN_INT (d - 1),
4597
                                   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4598
                t3 = gen_reg_rtx (compute_mode);
4599
                t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4600
                                      compute_mode, 1, 1);
4601
                if (t3 == 0)
4602
                  {
4603
                    rtx lab;
4604
                    lab = gen_label_rtx ();
4605
                    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4606
                    expand_inc (t1, const1_rtx);
4607
                    emit_label (lab);
4608
                    quotient = t1;
4609
                  }
4610
                else
4611
                  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4612
                                                          t1, t3),
4613
                                            tquotient);
4614
                break;
4615
              }
4616
 
4617
            /* Try using an instruction that produces both the quotient and
4618
               remainder, using truncation.  We can easily compensate the
4619
               quotient or remainder to get ceiling rounding, once we have the
4620
               remainder.  Notice that we compute also the final remainder
4621
               value here, and return the result right away.  */
4622
            if (target == 0 || GET_MODE (target) != compute_mode)
4623
              target = gen_reg_rtx (compute_mode);
4624
            if (rem_flag)
4625
              {
4626
                remainder= (REG_P (target)
4627
                            ? target : gen_reg_rtx (compute_mode));
4628
                quotient = gen_reg_rtx (compute_mode);
4629
              }
4630
            else
4631
              {
4632
                quotient = (REG_P (target)
4633
                            ? target : gen_reg_rtx (compute_mode));
4634
                remainder = gen_reg_rtx (compute_mode);
4635
              }
4636
 
4637
            if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4638
                                     remainder, 0))
4639
              {
4640
                /* This could be computed with a branch-less sequence.
4641
                   Save that for later.  */
4642
                rtx tem;
4643
                rtx label = gen_label_rtx ();
4644
                do_cmp_and_jump (remainder, const0_rtx, EQ,
4645
                                 compute_mode, label);
4646
                tem = expand_binop (compute_mode, xor_optab, op0, op1,
4647
                                    NULL_RTX, 0, OPTAB_WIDEN);
4648
                do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4649
                expand_inc (quotient, const1_rtx);
4650
                expand_dec (remainder, op1);
4651
                emit_label (label);
4652
                return gen_lowpart (mode, rem_flag ? remainder : quotient);
4653
              }
4654
 
4655
            /* No luck with division elimination or divmod.  Have to do it
4656
               by conditionally adjusting op0 *and* the result.  */
4657
            {
4658
              rtx label1, label2, label3, label4, label5;
4659
              rtx adjusted_op0;
4660
              rtx tem;
4661
 
4662
              quotient = gen_reg_rtx (compute_mode);
4663
              adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4664
              label1 = gen_label_rtx ();
4665
              label2 = gen_label_rtx ();
4666
              label3 = gen_label_rtx ();
4667
              label4 = gen_label_rtx ();
4668
              label5 = gen_label_rtx ();
4669
              do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4670
              do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4671
                               compute_mode, label1);
4672
              tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4673
                                  quotient, 0, OPTAB_LIB_WIDEN);
4674
              if (tem != quotient)
4675
                emit_move_insn (quotient, tem);
4676
              emit_jump_insn (gen_jump (label5));
4677
              emit_barrier ();
4678
              emit_label (label1);
4679
              expand_dec (adjusted_op0, const1_rtx);
4680
              emit_jump_insn (gen_jump (label4));
4681
              emit_barrier ();
4682
              emit_label (label2);
4683
              do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4684
                               compute_mode, label3);
4685
              tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4686
                                  quotient, 0, OPTAB_LIB_WIDEN);
4687
              if (tem != quotient)
4688
                emit_move_insn (quotient, tem);
4689
              emit_jump_insn (gen_jump (label5));
4690
              emit_barrier ();
4691
              emit_label (label3);
4692
              expand_inc (adjusted_op0, const1_rtx);
4693
              emit_label (label4);
4694
              tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4695
                                  quotient, 0, OPTAB_LIB_WIDEN);
4696
              if (tem != quotient)
4697
                emit_move_insn (quotient, tem);
4698
              expand_inc (quotient, const1_rtx);
4699
              emit_label (label5);
4700
            }
4701
          }
4702
        break;
4703
 
4704
      case EXACT_DIV_EXPR:
4705
        if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4706
          {
4707
            HOST_WIDE_INT d = INTVAL (op1);
4708
            unsigned HOST_WIDE_INT ml;
4709
            int pre_shift;
4710
            rtx t1;
4711
 
4712
            pre_shift = floor_log2 (d & -d);
4713
            ml = invert_mod2n (d >> pre_shift, size);
4714
            t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4715
                               build_int_cst (NULL_TREE, pre_shift),
4716
                               NULL_RTX, unsignedp);
4717
            quotient = expand_mult (compute_mode, t1,
4718
                                    gen_int_mode (ml, compute_mode),
4719
                                    NULL_RTX, 1);
4720
 
4721
            insn = get_last_insn ();
4722
            set_unique_reg_note (insn,
4723
                                 REG_EQUAL,
4724
                                 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4725
                                                 compute_mode,
4726
                                                 op0, op1));
4727
          }
4728
        break;
4729
 
4730
      case ROUND_DIV_EXPR:
4731
      case ROUND_MOD_EXPR:
4732
        if (unsignedp)
4733
          {
4734
            rtx tem;
4735
            rtx label;
4736
            label = gen_label_rtx ();
4737
            quotient = gen_reg_rtx (compute_mode);
4738
            remainder = gen_reg_rtx (compute_mode);
4739
            if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4740
              {
4741
                rtx tem;
4742
                quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4743
                                         quotient, 1, OPTAB_LIB_WIDEN);
4744
                tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4745
                remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4746
                                          remainder, 1, OPTAB_LIB_WIDEN);
4747
              }
4748
            tem = plus_constant (op1, -1);
4749
            tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4750
                                build_int_cst (NULL_TREE, 1),
4751
                                NULL_RTX, 1);
4752
            do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4753
            expand_inc (quotient, const1_rtx);
4754
            expand_dec (remainder, op1);
4755
            emit_label (label);
4756
          }
4757
        else
4758
          {
4759
            rtx abs_rem, abs_op1, tem, mask;
4760
            rtx label;
4761
            label = gen_label_rtx ();
4762
            quotient = gen_reg_rtx (compute_mode);
4763
            remainder = gen_reg_rtx (compute_mode);
4764
            if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4765
              {
4766
                rtx tem;
4767
                quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4768
                                         quotient, 0, OPTAB_LIB_WIDEN);
4769
                tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4770
                remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4771
                                          remainder, 0, OPTAB_LIB_WIDEN);
4772
              }
4773
            abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4774
            abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4775
            tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4776
                                build_int_cst (NULL_TREE, 1),
4777
                                NULL_RTX, 1);
4778
            do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4779
            tem = expand_binop (compute_mode, xor_optab, op0, op1,
4780
                                NULL_RTX, 0, OPTAB_WIDEN);
4781
            mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4782
                                 build_int_cst (NULL_TREE, size - 1),
4783
                                 NULL_RTX, 0);
4784
            tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4785
                                NULL_RTX, 0, OPTAB_WIDEN);
4786
            tem = expand_binop (compute_mode, sub_optab, tem, mask,
4787
                                NULL_RTX, 0, OPTAB_WIDEN);
4788
            expand_inc (quotient, tem);
4789
            tem = expand_binop (compute_mode, xor_optab, mask, op1,
4790
                                NULL_RTX, 0, OPTAB_WIDEN);
4791
            tem = expand_binop (compute_mode, sub_optab, tem, mask,
4792
                                NULL_RTX, 0, OPTAB_WIDEN);
4793
            expand_dec (remainder, tem);
4794
            emit_label (label);
4795
          }
4796
        return gen_lowpart (mode, rem_flag ? remainder : quotient);
4797
 
4798
      default:
4799
        gcc_unreachable ();
4800
      }
4801
 
4802
  if (quotient == 0)
4803
    {
4804
      if (target && GET_MODE (target) != compute_mode)
4805
        target = 0;
4806
 
4807
      if (rem_flag)
4808
        {
4809
          /* Try to produce the remainder without producing the quotient.
4810
             If we seem to have a divmod pattern that does not require widening,
4811
             don't try widening here.  We should really have a WIDEN argument
4812
             to expand_twoval_binop, since what we'd really like to do here is
4813
             1) try a mod insn in compute_mode
4814
             2) try a divmod insn in compute_mode
4815
             3) try a div insn in compute_mode and multiply-subtract to get
4816
                remainder
4817
             4) try the same things with widening allowed.  */
4818
          remainder
4819
            = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4820
                                 op0, op1, target,
4821
                                 unsignedp,
4822
                                 ((optab2->handlers[compute_mode].insn_code
4823
                                   != CODE_FOR_nothing)
4824
                                  ? OPTAB_DIRECT : OPTAB_WIDEN));
4825
          if (remainder == 0)
4826
            {
4827
              /* No luck there.  Can we do remainder and divide at once
4828
                 without a library call?  */
4829
              remainder = gen_reg_rtx (compute_mode);
4830
              if (! expand_twoval_binop ((unsignedp
4831
                                          ? udivmod_optab
4832
                                          : sdivmod_optab),
4833
                                         op0, op1,
4834
                                         NULL_RTX, remainder, unsignedp))
4835
                remainder = 0;
4836
            }
4837
 
4838
          if (remainder)
4839
            return gen_lowpart (mode, remainder);
4840
        }
4841
 
4842
      /* Produce the quotient.  Try a quotient insn, but not a library call.
4843
         If we have a divmod in this mode, use it in preference to widening
4844
         the div (for this test we assume it will not fail). Note that optab2
4845
         is set to the one of the two optabs that the call below will use.  */
4846
      quotient
4847
        = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4848
                             op0, op1, rem_flag ? NULL_RTX : target,
4849
                             unsignedp,
4850
                             ((optab2->handlers[compute_mode].insn_code
4851
                               != CODE_FOR_nothing)
4852
                              ? OPTAB_DIRECT : OPTAB_WIDEN));
4853
 
4854
      if (quotient == 0)
4855
        {
4856
          /* No luck there.  Try a quotient-and-remainder insn,
4857
             keeping the quotient alone.  */
4858
          quotient = gen_reg_rtx (compute_mode);
4859
          if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4860
                                     op0, op1,
4861
                                     quotient, NULL_RTX, unsignedp))
4862
            {
4863
              quotient = 0;
4864
              if (! rem_flag)
4865
                /* Still no luck.  If we are not computing the remainder,
4866
                   use a library call for the quotient.  */
4867
                quotient = sign_expand_binop (compute_mode,
4868
                                              udiv_optab, sdiv_optab,
4869
                                              op0, op1, target,
4870
                                              unsignedp, OPTAB_LIB_WIDEN);
4871
            }
4872
        }
4873
    }
4874
 
4875
  if (rem_flag)
4876
    {
4877
      if (target && GET_MODE (target) != compute_mode)
4878
        target = 0;
4879
 
4880
      if (quotient == 0)
4881
        {
4882
          /* No divide instruction either.  Use library for remainder.  */
4883
          remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4884
                                         op0, op1, target,
4885
                                         unsignedp, OPTAB_LIB_WIDEN);
4886
          /* No remainder function.  Try a quotient-and-remainder
4887
             function, keeping the remainder.  */
4888
          if (!remainder)
4889
            {
4890
              remainder = gen_reg_rtx (compute_mode);
4891
              if (!expand_twoval_binop_libfunc
4892
                  (unsignedp ? udivmod_optab : sdivmod_optab,
4893
                   op0, op1,
4894
                   NULL_RTX, remainder,
4895
                   unsignedp ? UMOD : MOD))
4896
                remainder = NULL_RTX;
4897
            }
4898
        }
4899
      else
4900
        {
4901
          /* We divided.  Now finish doing X - Y * (X / Y).  */
4902
          remainder = expand_mult (compute_mode, quotient, op1,
4903
                                   NULL_RTX, unsignedp);
4904
          remainder = expand_binop (compute_mode, sub_optab, op0,
4905
                                    remainder, target, unsignedp,
4906
                                    OPTAB_LIB_WIDEN);
4907
        }
4908
    }
4909
 
4910
  return gen_lowpart (mode, rem_flag ? remainder : quotient);
4911
}
4912
 
4913
/* Return a tree node with data type TYPE, describing the value of X.
4914
   Usually this is an VAR_DECL, if there is no obvious better choice.
4915
   X may be an expression, however we only support those expressions
4916
   generated by loop.c.  */
4917
 
4918
tree
4919
make_tree (tree type, rtx x)
4920
{
4921
  tree t;
4922
 
4923
  switch (GET_CODE (x))
4924
    {
4925
    case CONST_INT:
4926
      {
4927
        HOST_WIDE_INT hi = 0;
4928
 
4929
        if (INTVAL (x) < 0
4930
            && !(TYPE_UNSIGNED (type)
4931
                 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4932
                     < HOST_BITS_PER_WIDE_INT)))
4933
          hi = -1;
4934
 
4935
        t = build_int_cst_wide (type, INTVAL (x), hi);
4936
 
4937
        return t;
4938
      }
4939
 
4940
    case CONST_DOUBLE:
4941
      if (GET_MODE (x) == VOIDmode)
4942
        t = build_int_cst_wide (type,
4943
                                CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4944
      else
4945
        {
4946
          REAL_VALUE_TYPE d;
4947
 
4948
          REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4949
          t = build_real (type, d);
4950
        }
4951
 
4952
      return t;
4953
 
4954
    case CONST_VECTOR:
4955
      {
4956
        int i, units;
4957
        rtx elt;
4958
        tree t = NULL_TREE;
4959
 
4960
        units = CONST_VECTOR_NUNITS (x);
4961
 
4962
        /* Build a tree with vector elements.  */
4963
        for (i = units - 1; i >= 0; --i)
4964
          {
4965
            elt = CONST_VECTOR_ELT (x, i);
4966
            t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4967
          }
4968
 
4969
        return build_vector (type, t);
4970
      }
4971
 
4972
    case PLUS:
4973
      return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4974
                          make_tree (type, XEXP (x, 1)));
4975
 
4976
    case MINUS:
4977
      return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4978
                          make_tree (type, XEXP (x, 1)));
4979
 
4980
    case NEG:
4981
      return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4982
 
4983
    case MULT:
4984
      return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4985
                          make_tree (type, XEXP (x, 1)));
4986
 
4987
    case ASHIFT:
4988
      return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4989
                          make_tree (type, XEXP (x, 1)));
4990
 
4991
    case LSHIFTRT:
4992
      t = lang_hooks.types.unsigned_type (type);
4993
      return fold_convert (type, build2 (RSHIFT_EXPR, t,
4994
                                         make_tree (t, XEXP (x, 0)),
4995
                                         make_tree (type, XEXP (x, 1))));
4996
 
4997
    case ASHIFTRT:
4998
      t = lang_hooks.types.signed_type (type);
4999
      return fold_convert (type, build2 (RSHIFT_EXPR, t,
5000
                                         make_tree (t, XEXP (x, 0)),
5001
                                         make_tree (type, XEXP (x, 1))));
5002
 
5003
    case DIV:
5004
      if (TREE_CODE (type) != REAL_TYPE)
5005
        t = lang_hooks.types.signed_type (type);
5006
      else
5007
        t = type;
5008
 
5009
      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5010
                                         make_tree (t, XEXP (x, 0)),
5011
                                         make_tree (t, XEXP (x, 1))));
5012
    case UDIV:
5013
      t = lang_hooks.types.unsigned_type (type);
5014
      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5015
                                         make_tree (t, XEXP (x, 0)),
5016
                                         make_tree (t, XEXP (x, 1))));
5017
 
5018
    case SIGN_EXTEND:
5019
    case ZERO_EXTEND:
5020
      t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5021
                                          GET_CODE (x) == ZERO_EXTEND);
5022
      return fold_convert (type, make_tree (t, XEXP (x, 0)));
5023
 
5024
    default:
5025
      t = build_decl (VAR_DECL, NULL_TREE, type);
5026
 
5027
      /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
5028
         ptr_mode.  So convert.  */
5029
      if (POINTER_TYPE_P (type))
5030
        x = convert_memory_address (TYPE_MODE (type), x);
5031
 
5032
      /* Note that we do *not* use SET_DECL_RTL here, because we do not
5033
         want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5034
      t->decl_with_rtl.rtl = x;
5035
 
5036
      return t;
5037
    }
5038
}
5039
 
5040
/* Check whether the multiplication X * MULT + ADD overflows.
5041
   X, MULT and ADD must be CONST_*.
5042
   MODE is the machine mode for the computation.
5043
   X and MULT must have mode MODE.  ADD may have a different mode.
5044
   So can X (defaults to same as MODE).
5045
   UNSIGNEDP is nonzero to do unsigned multiplication.  */
5046
 
5047
bool
5048
const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
5049
                           enum machine_mode mode, int unsignedp)
5050
{
5051
  tree type, mult_type, add_type, result;
5052
 
5053
  type = lang_hooks.types.type_for_mode (mode, unsignedp);
5054
 
5055
  /* In order to get a proper overflow indication from an unsigned
5056
     type, we have to pretend that it's a sizetype.  */
5057
  mult_type = type;
5058
  if (unsignedp)
5059
    {
5060
      /* FIXME:It would be nice if we could step directly from this
5061
         type to its sizetype equivalent.  */
5062
      mult_type = build_distinct_type_copy (type);
5063
      TYPE_IS_SIZETYPE (mult_type) = 1;
5064
    }
5065
 
5066
  add_type = (GET_MODE (add) == VOIDmode ? mult_type
5067
              : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
5068
 
5069
  result = fold_build2 (PLUS_EXPR, mult_type,
5070
                        fold_build2 (MULT_EXPR, mult_type,
5071
                                     make_tree (mult_type, x),
5072
                                     make_tree (mult_type, mult)),
5073
                        make_tree (add_type, add));
5074
 
5075
  return TREE_CONSTANT_OVERFLOW (result);
5076
}
5077
 
5078
/* Return an rtx representing the value of X * MULT + ADD.
5079
   TARGET is a suggestion for where to store the result (an rtx).
5080
   MODE is the machine mode for the computation.
5081
   X and MULT must have mode MODE.  ADD may have a different mode.
5082
   So can X (defaults to same as MODE).
5083
   UNSIGNEDP is nonzero to do unsigned multiplication.
5084
   This may emit insns.  */
5085
 
5086
rtx
5087
expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
5088
                 int unsignedp)
5089
{
5090
  tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
5091
  tree add_type = (GET_MODE (add) == VOIDmode
5092
                   ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
5093
                                                           unsignedp));
5094
  tree result = fold_build2 (PLUS_EXPR, type,
5095
                             fold_build2 (MULT_EXPR, type,
5096
                                          make_tree (type, x),
5097
                                          make_tree (type, mult)),
5098
                             make_tree (add_type, add));
5099
 
5100
  return expand_expr (result, target, VOIDmode, 0);
5101
}
5102
 
5103
/* Compute the logical-and of OP0 and OP1, storing it in TARGET
5104
   and returning TARGET.
5105
 
5106
   If TARGET is 0, a pseudo-register or constant is returned.  */
5107
 
5108
rtx
5109
expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5110
{
5111
  rtx tem = 0;
5112
 
5113
  if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5114
    tem = simplify_binary_operation (AND, mode, op0, op1);
5115
  if (tem == 0)
5116
    tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5117
 
5118
  if (target == 0)
5119
    target = tem;
5120
  else if (tem != target)
5121
    emit_move_insn (target, tem);
5122
  return target;
5123
}
5124
 
5125
/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5126
   and storing in TARGET.  Normally return TARGET.
5127
   Return 0 if that cannot be done.
5128
 
5129
   MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5130
   it is VOIDmode, they cannot both be CONST_INT.
5131
 
5132
   UNSIGNEDP is for the case where we have to widen the operands
5133
   to perform the operation.  It says to use zero-extension.
5134
 
5135
   NORMALIZEP is 1 if we should convert the result to be either zero
5136
   or one.  Normalize is -1 if we should convert the result to be
5137
   either zero or -1.  If NORMALIZEP is zero, the result will be left
5138
   "raw" out of the scc insn.  */
5139
 
5140
rtx
5141
emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5142
                 enum machine_mode mode, int unsignedp, int normalizep)
5143
{
5144
  rtx subtarget;
5145
  enum insn_code icode;
5146
  enum machine_mode compare_mode;
5147
  enum machine_mode target_mode = GET_MODE (target);
5148
  rtx tem;
5149
  rtx last = get_last_insn ();
5150
  rtx pattern, comparison;
5151
 
5152
  if (unsignedp)
5153
    code = unsigned_condition (code);
5154
 
5155
  /* If one operand is constant, make it the second one.  Only do this
5156
     if the other operand is not constant as well.  */
5157
 
5158
  if (swap_commutative_operands_p (op0, op1))
5159
    {
5160
      tem = op0;
5161
      op0 = op1;
5162
      op1 = tem;
5163
      code = swap_condition (code);
5164
    }
5165
 
5166
  if (mode == VOIDmode)
5167
    mode = GET_MODE (op0);
5168
 
5169
  /* For some comparisons with 1 and -1, we can convert this to
5170
     comparisons with zero.  This will often produce more opportunities for
5171
     store-flag insns.  */
5172
 
5173
  switch (code)
5174
    {
5175
    case LT:
5176
      if (op1 == const1_rtx)
5177
        op1 = const0_rtx, code = LE;
5178
      break;
5179
    case LE:
5180
      if (op1 == constm1_rtx)
5181
        op1 = const0_rtx, code = LT;
5182
      break;
5183
    case GE:
5184
      if (op1 == const1_rtx)
5185
        op1 = const0_rtx, code = GT;
5186
      break;
5187
    case GT:
5188
      if (op1 == constm1_rtx)
5189
        op1 = const0_rtx, code = GE;
5190
      break;
5191
    case GEU:
5192
      if (op1 == const1_rtx)
5193
        op1 = const0_rtx, code = NE;
5194
      break;
5195
    case LTU:
5196
      if (op1 == const1_rtx)
5197
        op1 = const0_rtx, code = EQ;
5198
      break;
5199
    default:
5200
      break;
5201
    }
5202
 
5203
  /* If we are comparing a double-word integer with zero or -1, we can
5204
     convert the comparison into one involving a single word.  */
5205
  if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5206
      && GET_MODE_CLASS (mode) == MODE_INT
5207
      && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5208
    {
5209
      if ((code == EQ || code == NE)
5210
          && (op1 == const0_rtx || op1 == constm1_rtx))
5211
        {
5212
          rtx op00, op01, op0both;
5213
 
5214
          /* Do a logical OR or AND of the two words and compare the result.  */
5215
          op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5216
          op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5217
          op0both = expand_binop (word_mode,
5218
                                  op1 == const0_rtx ? ior_optab : and_optab,
5219
                                  op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5220
 
5221
          if (op0both != 0)
5222
            return emit_store_flag (target, code, op0both, op1, word_mode,
5223
                                    unsignedp, normalizep);
5224
        }
5225
      else if ((code == LT || code == GE) && op1 == const0_rtx)
5226
        {
5227
          rtx op0h;
5228
 
5229
          /* If testing the sign bit, can just test on high word.  */
5230
          op0h = simplify_gen_subreg (word_mode, op0, mode,
5231
                                      subreg_highpart_offset (word_mode, mode));
5232
          return emit_store_flag (target, code, op0h, op1, word_mode,
5233
                                  unsignedp, normalizep);
5234
        }
5235
    }
5236
 
5237
  /* From now on, we won't change CODE, so set ICODE now.  */
5238
  icode = setcc_gen_code[(int) code];
5239
 
5240
  /* If this is A < 0 or A >= 0, we can do this by taking the ones
5241
     complement of A (for GE) and shifting the sign bit to the low bit.  */
5242
  if (op1 == const0_rtx && (code == LT || code == GE)
5243
      && GET_MODE_CLASS (mode) == MODE_INT
5244
      && (normalizep || STORE_FLAG_VALUE == 1
5245
          || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5246
              && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5247
                  == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5248
    {
5249
      subtarget = target;
5250
 
5251
      /* If the result is to be wider than OP0, it is best to convert it
5252
         first.  If it is to be narrower, it is *incorrect* to convert it
5253
         first.  */
5254
      if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5255
        {
5256
          op0 = convert_modes (target_mode, mode, op0, 0);
5257
          mode = target_mode;
5258
        }
5259
 
5260
      if (target_mode != mode)
5261
        subtarget = 0;
5262
 
5263
      if (code == GE)
5264
        op0 = expand_unop (mode, one_cmpl_optab, op0,
5265
                           ((STORE_FLAG_VALUE == 1 || normalizep)
5266
                            ? 0 : subtarget), 0);
5267
 
5268
      if (STORE_FLAG_VALUE == 1 || normalizep)
5269
        /* If we are supposed to produce a 0/1 value, we want to do
5270
           a logical shift from the sign bit to the low-order bit; for
5271
           a -1/0 value, we do an arithmetic shift.  */
5272
        op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5273
                            size_int (GET_MODE_BITSIZE (mode) - 1),
5274
                            subtarget, normalizep != -1);
5275
 
5276
      if (mode != target_mode)
5277
        op0 = convert_modes (target_mode, mode, op0, 0);
5278
 
5279
      return op0;
5280
    }
5281
 
5282
  if (icode != CODE_FOR_nothing)
5283
    {
5284
      insn_operand_predicate_fn pred;
5285
 
5286
      /* We think we may be able to do this with a scc insn.  Emit the
5287
         comparison and then the scc insn.  */
5288
 
5289
      do_pending_stack_adjust ();
5290
      last = get_last_insn ();
5291
 
5292
      comparison
5293
        = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5294
      if (CONSTANT_P (comparison))
5295
        {
5296
          switch (GET_CODE (comparison))
5297
            {
5298
            case CONST_INT:
5299
              if (comparison == const0_rtx)
5300
                return const0_rtx;
5301
              break;
5302
 
5303
#ifdef FLOAT_STORE_FLAG_VALUE
5304
            case CONST_DOUBLE:
5305
              if (comparison == CONST0_RTX (GET_MODE (comparison)))
5306
                return const0_rtx;
5307
              break;
5308
#endif
5309
            default:
5310
              gcc_unreachable ();
5311
            }
5312
 
5313
          if (normalizep == 1)
5314
            return const1_rtx;
5315
          if (normalizep == -1)
5316
            return constm1_rtx;
5317
          return const_true_rtx;
5318
        }
5319
 
5320
      /* The code of COMPARISON may not match CODE if compare_from_rtx
5321
         decided to swap its operands and reverse the original code.
5322
 
5323
         We know that compare_from_rtx returns either a CONST_INT or
5324
         a new comparison code, so it is safe to just extract the
5325
         code from COMPARISON.  */
5326
      code = GET_CODE (comparison);
5327
 
5328
      /* Get a reference to the target in the proper mode for this insn.  */
5329
      compare_mode = insn_data[(int) icode].operand[0].mode;
5330
      subtarget = target;
5331
      pred = insn_data[(int) icode].operand[0].predicate;
5332
      if (optimize || ! (*pred) (subtarget, compare_mode))
5333
        subtarget = gen_reg_rtx (compare_mode);
5334
 
5335
      pattern = GEN_FCN (icode) (subtarget);
5336
      if (pattern)
5337
        {
5338
          emit_insn (pattern);
5339
 
5340
          /* If we are converting to a wider mode, first convert to
5341
             TARGET_MODE, then normalize.  This produces better combining
5342
             opportunities on machines that have a SIGN_EXTRACT when we are
5343
             testing a single bit.  This mostly benefits the 68k.
5344
 
5345
             If STORE_FLAG_VALUE does not have the sign bit set when
5346
             interpreted in COMPARE_MODE, we can do this conversion as
5347
             unsigned, which is usually more efficient.  */
5348
          if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5349
            {
5350
              convert_move (target, subtarget,
5351
                            (GET_MODE_BITSIZE (compare_mode)
5352
                             <= HOST_BITS_PER_WIDE_INT)
5353
                            && 0 == (STORE_FLAG_VALUE
5354
                                     & ((HOST_WIDE_INT) 1
5355
                                        << (GET_MODE_BITSIZE (compare_mode) -1))));
5356
              op0 = target;
5357
              compare_mode = target_mode;
5358
            }
5359
          else
5360
            op0 = subtarget;
5361
 
5362
          /* If we want to keep subexpressions around, don't reuse our
5363
             last target.  */
5364
 
5365
          if (optimize)
5366
            subtarget = 0;
5367
 
5368
          /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5369
             we don't have to do anything.  */
5370
          if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5371
            ;
5372
          /* STORE_FLAG_VALUE might be the most negative number, so write
5373
             the comparison this way to avoid a compiler-time warning.  */
5374
          else if (- normalizep == STORE_FLAG_VALUE)
5375
            op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5376
 
5377
          /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5378
             makes it hard to use a value of just the sign bit due to
5379
             ANSI integer constant typing rules.  */
5380
          else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5381
                   && (STORE_FLAG_VALUE
5382
                       & ((HOST_WIDE_INT) 1
5383
                          << (GET_MODE_BITSIZE (compare_mode) - 1))))
5384
            op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5385
                                size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5386
                                subtarget, normalizep == 1);
5387
          else
5388
            {
5389
              gcc_assert (STORE_FLAG_VALUE & 1);
5390
 
5391
              op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5392
              if (normalizep == -1)
5393
                op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5394
            }
5395
 
5396
          /* If we were converting to a smaller mode, do the
5397
             conversion now.  */
5398
          if (target_mode != compare_mode)
5399
            {
5400
              convert_move (target, op0, 0);
5401
              return target;
5402
            }
5403
          else
5404
            return op0;
5405
        }
5406
    }
5407
 
5408
  delete_insns_since (last);
5409
 
5410
  /* If optimizing, use different pseudo registers for each insn, instead
5411
     of reusing the same pseudo.  This leads to better CSE, but slows
5412
     down the compiler, since there are more pseudos */
5413
  subtarget = (!optimize
5414
               && (target_mode == mode)) ? target : NULL_RTX;
5415
 
5416
  /* If we reached here, we can't do this with a scc insn.  However, there
5417
     are some comparisons that can be done directly.  For example, if
5418
     this is an equality comparison of integers, we can try to exclusive-or
5419
     (or subtract) the two operands and use a recursive call to try the
5420
     comparison with zero.  Don't do any of these cases if branches are
5421
     very cheap.  */
5422
 
5423
  if (BRANCH_COST > 0
5424
      && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5425
      && op1 != const0_rtx)
5426
    {
5427
      tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5428
                          OPTAB_WIDEN);
5429
 
5430
      if (tem == 0)
5431
        tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5432
                            OPTAB_WIDEN);
5433
      if (tem != 0)
5434
        tem = emit_store_flag (target, code, tem, const0_rtx,
5435
                               mode, unsignedp, normalizep);
5436
      if (tem == 0)
5437
        delete_insns_since (last);
5438
      return tem;
5439
    }
5440
 
5441
  /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5442
     the constant zero.  Reject all other comparisons at this point.  Only
5443
     do LE and GT if branches are expensive since they are expensive on
5444
     2-operand machines.  */
5445
 
5446
  if (BRANCH_COST == 0
5447
      || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5448
      || (code != EQ && code != NE
5449
          && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5450
    return 0;
5451
 
5452
  /* See what we need to return.  We can only return a 1, -1, or the
5453
     sign bit.  */
5454
 
5455
  if (normalizep == 0)
5456
    {
5457
      if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5458
        normalizep = STORE_FLAG_VALUE;
5459
 
5460
      else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5461
               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5462
                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5463
        ;
5464
      else
5465
        return 0;
5466
    }
5467
 
5468
  /* Try to put the result of the comparison in the sign bit.  Assume we can't
5469
     do the necessary operation below.  */
5470
 
5471
  tem = 0;
5472
 
5473
  /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5474
     the sign bit set.  */
5475
 
5476
  if (code == LE)
5477
    {
5478
      /* This is destructive, so SUBTARGET can't be OP0.  */
5479
      if (rtx_equal_p (subtarget, op0))
5480
        subtarget = 0;
5481
 
5482
      tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5483
                          OPTAB_WIDEN);
5484
      if (tem)
5485
        tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5486
                            OPTAB_WIDEN);
5487
    }
5488
 
5489
  /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5490
     number of bits in the mode of OP0, minus one.  */
5491
 
5492
  if (code == GT)
5493
    {
5494
      if (rtx_equal_p (subtarget, op0))
5495
        subtarget = 0;
5496
 
5497
      tem = expand_shift (RSHIFT_EXPR, mode, op0,
5498
                          size_int (GET_MODE_BITSIZE (mode) - 1),
5499
                          subtarget, 0);
5500
      tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5501
                          OPTAB_WIDEN);
5502
    }
5503
 
5504
  if (code == EQ || code == NE)
5505
    {
5506
      /* For EQ or NE, one way to do the comparison is to apply an operation
5507
         that converts the operand into a positive number if it is nonzero
5508
         or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5509
         for NE we negate.  This puts the result in the sign bit.  Then we
5510
         normalize with a shift, if needed.
5511
 
5512
         Two operations that can do the above actions are ABS and FFS, so try
5513
         them.  If that doesn't work, and MODE is smaller than a full word,
5514
         we can use zero-extension to the wider mode (an unsigned conversion)
5515
         as the operation.  */
5516
 
5517
      /* Note that ABS doesn't yield a positive number for INT_MIN, but
5518
         that is compensated by the subsequent overflow when subtracting
5519
         one / negating.  */
5520
 
5521
      if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5522
        tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5523
      else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5524
        tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5525
      else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5526
        {
5527
          tem = convert_modes (word_mode, mode, op0, 1);
5528
          mode = word_mode;
5529
        }
5530
 
5531
      if (tem != 0)
5532
        {
5533
          if (code == EQ)
5534
            tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5535
                                0, OPTAB_WIDEN);
5536
          else
5537
            tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5538
        }
5539
 
5540
      /* If we couldn't do it that way, for NE we can "or" the two's complement
5541
         of the value with itself.  For EQ, we take the one's complement of
5542
         that "or", which is an extra insn, so we only handle EQ if branches
5543
         are expensive.  */
5544
 
5545
      if (tem == 0 && (code == NE || BRANCH_COST > 1))
5546
        {
5547
          if (rtx_equal_p (subtarget, op0))
5548
            subtarget = 0;
5549
 
5550
          tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5551
          tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5552
                              OPTAB_WIDEN);
5553
 
5554
          if (tem && code == EQ)
5555
            tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5556
        }
5557
    }
5558
 
5559
  if (tem && normalizep)
5560
    tem = expand_shift (RSHIFT_EXPR, mode, tem,
5561
                        size_int (GET_MODE_BITSIZE (mode) - 1),
5562
                        subtarget, normalizep == 1);
5563
 
5564
  if (tem)
5565
    {
5566
      if (GET_MODE (tem) != target_mode)
5567
        {
5568
          convert_move (target, tem, 0);
5569
          tem = target;
5570
        }
5571
      else if (!subtarget)
5572
        {
5573
          emit_move_insn (target, tem);
5574
          tem = target;
5575
        }
5576
    }
5577
  else
5578
    delete_insns_since (last);
5579
 
5580
  return tem;
5581
}
5582
 
5583
/* Like emit_store_flag, but always succeeds.  */
5584
 
5585
rtx
5586
emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5587
                       enum machine_mode mode, int unsignedp, int normalizep)
5588
{
5589
  rtx tem, label;
5590
 
5591
  /* First see if emit_store_flag can do the job.  */
5592
  tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5593
  if (tem != 0)
5594
    return tem;
5595
 
5596
  if (normalizep == 0)
5597
    normalizep = 1;
5598
 
5599
  /* If this failed, we have to do this with set/compare/jump/set code.  */
5600
 
5601
  if (!REG_P (target)
5602
      || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5603
    target = gen_reg_rtx (GET_MODE (target));
5604
 
5605
  emit_move_insn (target, const1_rtx);
5606
  label = gen_label_rtx ();
5607
  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5608
                           NULL_RTX, label);
5609
 
5610
  emit_move_insn (target, const0_rtx);
5611
  emit_label (label);
5612
 
5613
  return target;
5614
}
5615
 
5616
/* Perform possibly multi-word comparison and conditional jump to LABEL
5617
   if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5618
 
5619
   The algorithm is based on the code in expr.c:do_jump.
5620
 
5621
   Note that this does not perform a general comparison.  Only
5622
   variants generated within expmed.c are correctly handled, others
5623
   could be handled if needed.  */
5624
 
5625
static void
5626
do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5627
                 rtx label)
5628
{
5629
  /* If this mode is an integer too wide to compare properly,
5630
     compare word by word.  Rely on cse to optimize constant cases.  */
5631
 
5632
  if (GET_MODE_CLASS (mode) == MODE_INT
5633
      && ! can_compare_p (op, mode, ccp_jump))
5634
    {
5635
      rtx label2 = gen_label_rtx ();
5636
 
5637
      switch (op)
5638
        {
5639
        case LTU:
5640
          do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5641
          break;
5642
 
5643
        case LEU:
5644
          do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5645
          break;
5646
 
5647
        case LT:
5648
          do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5649
          break;
5650
 
5651
        case GT:
5652
          do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5653
          break;
5654
 
5655
        case GE:
5656
          do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5657
          break;
5658
 
5659
          /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5660
             that's the only equality operations we do */
5661
        case EQ:
5662
          gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5663
          do_jump_by_parts_equality_rtx (arg1, label2, label);
5664
          break;
5665
 
5666
        case NE:
5667
          gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5668
          do_jump_by_parts_equality_rtx (arg1, label, label2);
5669
          break;
5670
 
5671
        default:
5672
          gcc_unreachable ();
5673
        }
5674
 
5675
      emit_label (label2);
5676
    }
5677
  else
5678
    emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5679
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.