OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [gcc/] [config/] [i386/] [i386.c] - Blame information for rev 12

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 12 jlechner
/* Subroutines used for code generation on IA-32.
2
   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3
   2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify
8
it under the terms of the GNU General Public License as published by
9
the Free Software Foundation; either version 2, or (at your option)
10
any later version.
11
 
12
GCC is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
GNU General Public License for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING.  If not, write to
19
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20
Boston, MA 02110-1301, USA.  */
21
 
22
#include "config.h"
23
#include "system.h"
24
#include "coretypes.h"
25
#include "tm.h"
26
#include "rtl.h"
27
#include "tree.h"
28
#include "tm_p.h"
29
#include "regs.h"
30
#include "hard-reg-set.h"
31
#include "real.h"
32
#include "insn-config.h"
33
#include "conditions.h"
34
#include "output.h"
35
#include "insn-codes.h"
36
#include "insn-attr.h"
37
#include "flags.h"
38
#include "except.h"
39
#include "function.h"
40
#include "recog.h"
41
#include "expr.h"
42
#include "optabs.h"
43
#include "toplev.h"
44
#include "basic-block.h"
45
#include "ggc.h"
46
#include "target.h"
47
#include "target-def.h"
48
#include "langhooks.h"
49
#include "cgraph.h"
50
#include "tree-gimple.h"
51
#include "dwarf2.h"
52
 
53
#ifndef CHECK_STACK_LIMIT
54
#define CHECK_STACK_LIMIT (-1)
55
#endif
56
 
57
/* Return index of given mode in mult and division cost tables.  */
58
#define MODE_INDEX(mode)                                        \
59
  ((mode) == QImode ? 0                                          \
60
   : (mode) == HImode ? 1                                       \
61
   : (mode) == SImode ? 2                                       \
62
   : (mode) == DImode ? 3                                       \
63
   : 4)
64
 
65
/* Processor costs (relative to an add) */
66
static const
67
struct processor_costs size_cost = {    /* costs for tunning for size */
68
  2,                                    /* cost of an add instruction */
69
  3,                                    /* cost of a lea instruction */
70
  2,                                    /* variable shift costs */
71
  3,                                    /* constant shift costs */
72
  {3, 3, 3, 3, 5},                      /* cost of starting a multiply */
73
  0,                                     /* cost of multiply per each bit set */
74
  {3, 3, 3, 3, 5},                      /* cost of a divide/mod */
75
  3,                                    /* cost of movsx */
76
  3,                                    /* cost of movzx */
77
  0,                                     /* "large" insn */
78
  2,                                    /* MOVE_RATIO */
79
  2,                                    /* cost for loading QImode using movzbl */
80
  {2, 2, 2},                            /* cost of loading integer registers
81
                                           in QImode, HImode and SImode.
82
                                           Relative to reg-reg move (2).  */
83
  {2, 2, 2},                            /* cost of storing integer registers */
84
  2,                                    /* cost of reg,reg fld/fst */
85
  {2, 2, 2},                            /* cost of loading fp registers
86
                                           in SFmode, DFmode and XFmode */
87
  {2, 2, 2},                            /* cost of loading integer registers */
88
  3,                                    /* cost of moving MMX register */
89
  {3, 3},                               /* cost of loading MMX registers
90
                                           in SImode and DImode */
91
  {3, 3},                               /* cost of storing MMX registers
92
                                           in SImode and DImode */
93
  3,                                    /* cost of moving SSE register */
94
  {3, 3, 3},                            /* cost of loading SSE registers
95
                                           in SImode, DImode and TImode */
96
  {3, 3, 3},                            /* cost of storing SSE registers
97
                                           in SImode, DImode and TImode */
98
  3,                                    /* MMX or SSE register to integer */
99
  0,                                     /* size of prefetch block */
100
  0,                                     /* number of parallel prefetches */
101
  1,                                    /* Branch cost */
102
  2,                                    /* cost of FADD and FSUB insns.  */
103
  2,                                    /* cost of FMUL instruction.  */
104
  2,                                    /* cost of FDIV instruction.  */
105
  2,                                    /* cost of FABS instruction.  */
106
  2,                                    /* cost of FCHS instruction.  */
107
  2,                                    /* cost of FSQRT instruction.  */
108
};
109
 
110
/* Processor costs (relative to an add) */
111
static const
112
struct processor_costs i386_cost = {    /* 386 specific costs */
113
  1,                                    /* cost of an add instruction */
114
  1,                                    /* cost of a lea instruction */
115
  3,                                    /* variable shift costs */
116
  2,                                    /* constant shift costs */
117
  {6, 6, 6, 6, 6},                      /* cost of starting a multiply */
118
  1,                                    /* cost of multiply per each bit set */
119
  {23, 23, 23, 23, 23},                 /* cost of a divide/mod */
120
  3,                                    /* cost of movsx */
121
  2,                                    /* cost of movzx */
122
  15,                                   /* "large" insn */
123
  3,                                    /* MOVE_RATIO */
124
  4,                                    /* cost for loading QImode using movzbl */
125
  {2, 4, 2},                            /* cost of loading integer registers
126
                                           in QImode, HImode and SImode.
127
                                           Relative to reg-reg move (2).  */
128
  {2, 4, 2},                            /* cost of storing integer registers */
129
  2,                                    /* cost of reg,reg fld/fst */
130
  {8, 8, 8},                            /* cost of loading fp registers
131
                                           in SFmode, DFmode and XFmode */
132
  {8, 8, 8},                            /* cost of loading integer registers */
133
  2,                                    /* cost of moving MMX register */
134
  {4, 8},                               /* cost of loading MMX registers
135
                                           in SImode and DImode */
136
  {4, 8},                               /* cost of storing MMX registers
137
                                           in SImode and DImode */
138
  2,                                    /* cost of moving SSE register */
139
  {4, 8, 16},                           /* cost of loading SSE registers
140
                                           in SImode, DImode and TImode */
141
  {4, 8, 16},                           /* cost of storing SSE registers
142
                                           in SImode, DImode and TImode */
143
  3,                                    /* MMX or SSE register to integer */
144
  0,                                     /* size of prefetch block */
145
  0,                                     /* number of parallel prefetches */
146
  1,                                    /* Branch cost */
147
  23,                                   /* cost of FADD and FSUB insns.  */
148
  27,                                   /* cost of FMUL instruction.  */
149
  88,                                   /* cost of FDIV instruction.  */
150
  22,                                   /* cost of FABS instruction.  */
151
  24,                                   /* cost of FCHS instruction.  */
152
  122,                                  /* cost of FSQRT instruction.  */
153
};
154
 
155
static const
156
struct processor_costs i486_cost = {    /* 486 specific costs */
157
  1,                                    /* cost of an add instruction */
158
  1,                                    /* cost of a lea instruction */
159
  3,                                    /* variable shift costs */
160
  2,                                    /* constant shift costs */
161
  {12, 12, 12, 12, 12},                 /* cost of starting a multiply */
162
  1,                                    /* cost of multiply per each bit set */
163
  {40, 40, 40, 40, 40},                 /* cost of a divide/mod */
164
  3,                                    /* cost of movsx */
165
  2,                                    /* cost of movzx */
166
  15,                                   /* "large" insn */
167
  3,                                    /* MOVE_RATIO */
168
  4,                                    /* cost for loading QImode using movzbl */
169
  {2, 4, 2},                            /* cost of loading integer registers
170
                                           in QImode, HImode and SImode.
171
                                           Relative to reg-reg move (2).  */
172
  {2, 4, 2},                            /* cost of storing integer registers */
173
  2,                                    /* cost of reg,reg fld/fst */
174
  {8, 8, 8},                            /* cost of loading fp registers
175
                                           in SFmode, DFmode and XFmode */
176
  {8, 8, 8},                            /* cost of loading integer registers */
177
  2,                                    /* cost of moving MMX register */
178
  {4, 8},                               /* cost of loading MMX registers
179
                                           in SImode and DImode */
180
  {4, 8},                               /* cost of storing MMX registers
181
                                           in SImode and DImode */
182
  2,                                    /* cost of moving SSE register */
183
  {4, 8, 16},                           /* cost of loading SSE registers
184
                                           in SImode, DImode and TImode */
185
  {4, 8, 16},                           /* cost of storing SSE registers
186
                                           in SImode, DImode and TImode */
187
  3,                                    /* MMX or SSE register to integer */
188
  0,                                     /* size of prefetch block */
189
  0,                                     /* number of parallel prefetches */
190
  1,                                    /* Branch cost */
191
  8,                                    /* cost of FADD and FSUB insns.  */
192
  16,                                   /* cost of FMUL instruction.  */
193
  73,                                   /* cost of FDIV instruction.  */
194
  3,                                    /* cost of FABS instruction.  */
195
  3,                                    /* cost of FCHS instruction.  */
196
  83,                                   /* cost of FSQRT instruction.  */
197
};
198
 
199
static const
200
struct processor_costs pentium_cost = {
201
  1,                                    /* cost of an add instruction */
202
  1,                                    /* cost of a lea instruction */
203
  4,                                    /* variable shift costs */
204
  1,                                    /* constant shift costs */
205
  {11, 11, 11, 11, 11},                 /* cost of starting a multiply */
206
  0,                                     /* cost of multiply per each bit set */
207
  {25, 25, 25, 25, 25},                 /* cost of a divide/mod */
208
  3,                                    /* cost of movsx */
209
  2,                                    /* cost of movzx */
210
  8,                                    /* "large" insn */
211
  6,                                    /* MOVE_RATIO */
212
  6,                                    /* cost for loading QImode using movzbl */
213
  {2, 4, 2},                            /* cost of loading integer registers
214
                                           in QImode, HImode and SImode.
215
                                           Relative to reg-reg move (2).  */
216
  {2, 4, 2},                            /* cost of storing integer registers */
217
  2,                                    /* cost of reg,reg fld/fst */
218
  {2, 2, 6},                            /* cost of loading fp registers
219
                                           in SFmode, DFmode and XFmode */
220
  {4, 4, 6},                            /* cost of loading integer registers */
221
  8,                                    /* cost of moving MMX register */
222
  {8, 8},                               /* cost of loading MMX registers
223
                                           in SImode and DImode */
224
  {8, 8},                               /* cost of storing MMX registers
225
                                           in SImode and DImode */
226
  2,                                    /* cost of moving SSE register */
227
  {4, 8, 16},                           /* cost of loading SSE registers
228
                                           in SImode, DImode and TImode */
229
  {4, 8, 16},                           /* cost of storing SSE registers
230
                                           in SImode, DImode and TImode */
231
  3,                                    /* MMX or SSE register to integer */
232
  0,                                     /* size of prefetch block */
233
  0,                                     /* number of parallel prefetches */
234
  2,                                    /* Branch cost */
235
  3,                                    /* cost of FADD and FSUB insns.  */
236
  3,                                    /* cost of FMUL instruction.  */
237
  39,                                   /* cost of FDIV instruction.  */
238
  1,                                    /* cost of FABS instruction.  */
239
  1,                                    /* cost of FCHS instruction.  */
240
  70,                                   /* cost of FSQRT instruction.  */
241
};
242
 
243
static const
244
struct processor_costs pentiumpro_cost = {
245
  1,                                    /* cost of an add instruction */
246
  1,                                    /* cost of a lea instruction */
247
  1,                                    /* variable shift costs */
248
  1,                                    /* constant shift costs */
249
  {4, 4, 4, 4, 4},                      /* cost of starting a multiply */
250
  0,                                     /* cost of multiply per each bit set */
251
  {17, 17, 17, 17, 17},                 /* cost of a divide/mod */
252
  1,                                    /* cost of movsx */
253
  1,                                    /* cost of movzx */
254
  8,                                    /* "large" insn */
255
  6,                                    /* MOVE_RATIO */
256
  2,                                    /* cost for loading QImode using movzbl */
257
  {4, 4, 4},                            /* cost of loading integer registers
258
                                           in QImode, HImode and SImode.
259
                                           Relative to reg-reg move (2).  */
260
  {2, 2, 2},                            /* cost of storing integer registers */
261
  2,                                    /* cost of reg,reg fld/fst */
262
  {2, 2, 6},                            /* cost of loading fp registers
263
                                           in SFmode, DFmode and XFmode */
264
  {4, 4, 6},                            /* cost of loading integer registers */
265
  2,                                    /* cost of moving MMX register */
266
  {2, 2},                               /* cost of loading MMX registers
267
                                           in SImode and DImode */
268
  {2, 2},                               /* cost of storing MMX registers
269
                                           in SImode and DImode */
270
  2,                                    /* cost of moving SSE register */
271
  {2, 2, 8},                            /* cost of loading SSE registers
272
                                           in SImode, DImode and TImode */
273
  {2, 2, 8},                            /* cost of storing SSE registers
274
                                           in SImode, DImode and TImode */
275
  3,                                    /* MMX or SSE register to integer */
276
  32,                                   /* size of prefetch block */
277
  6,                                    /* number of parallel prefetches */
278
  2,                                    /* Branch cost */
279
  3,                                    /* cost of FADD and FSUB insns.  */
280
  5,                                    /* cost of FMUL instruction.  */
281
  56,                                   /* cost of FDIV instruction.  */
282
  2,                                    /* cost of FABS instruction.  */
283
  2,                                    /* cost of FCHS instruction.  */
284
  56,                                   /* cost of FSQRT instruction.  */
285
};
286
 
287
static const
288
struct processor_costs k6_cost = {
289
  1,                                    /* cost of an add instruction */
290
  2,                                    /* cost of a lea instruction */
291
  1,                                    /* variable shift costs */
292
  1,                                    /* constant shift costs */
293
  {3, 3, 3, 3, 3},                      /* cost of starting a multiply */
294
  0,                                     /* cost of multiply per each bit set */
295
  {18, 18, 18, 18, 18},                 /* cost of a divide/mod */
296
  2,                                    /* cost of movsx */
297
  2,                                    /* cost of movzx */
298
  8,                                    /* "large" insn */
299
  4,                                    /* MOVE_RATIO */
300
  3,                                    /* cost for loading QImode using movzbl */
301
  {4, 5, 4},                            /* cost of loading integer registers
302
                                           in QImode, HImode and SImode.
303
                                           Relative to reg-reg move (2).  */
304
  {2, 3, 2},                            /* cost of storing integer registers */
305
  4,                                    /* cost of reg,reg fld/fst */
306
  {6, 6, 6},                            /* cost of loading fp registers
307
                                           in SFmode, DFmode and XFmode */
308
  {4, 4, 4},                            /* cost of loading integer registers */
309
  2,                                    /* cost of moving MMX register */
310
  {2, 2},                               /* cost of loading MMX registers
311
                                           in SImode and DImode */
312
  {2, 2},                               /* cost of storing MMX registers
313
                                           in SImode and DImode */
314
  2,                                    /* cost of moving SSE register */
315
  {2, 2, 8},                            /* cost of loading SSE registers
316
                                           in SImode, DImode and TImode */
317
  {2, 2, 8},                            /* cost of storing SSE registers
318
                                           in SImode, DImode and TImode */
319
  6,                                    /* MMX or SSE register to integer */
320
  32,                                   /* size of prefetch block */
321
  1,                                    /* number of parallel prefetches */
322
  1,                                    /* Branch cost */
323
  2,                                    /* cost of FADD and FSUB insns.  */
324
  2,                                    /* cost of FMUL instruction.  */
325
  56,                                   /* cost of FDIV instruction.  */
326
  2,                                    /* cost of FABS instruction.  */
327
  2,                                    /* cost of FCHS instruction.  */
328
  56,                                   /* cost of FSQRT instruction.  */
329
};
330
 
331
static const
332
struct processor_costs athlon_cost = {
333
  1,                                    /* cost of an add instruction */
334
  2,                                    /* cost of a lea instruction */
335
  1,                                    /* variable shift costs */
336
  1,                                    /* constant shift costs */
337
  {5, 5, 5, 5, 5},                      /* cost of starting a multiply */
338
  0,                                     /* cost of multiply per each bit set */
339
  {18, 26, 42, 74, 74},                 /* cost of a divide/mod */
340
  1,                                    /* cost of movsx */
341
  1,                                    /* cost of movzx */
342
  8,                                    /* "large" insn */
343
  9,                                    /* MOVE_RATIO */
344
  4,                                    /* cost for loading QImode using movzbl */
345
  {3, 4, 3},                            /* cost of loading integer registers
346
                                           in QImode, HImode and SImode.
347
                                           Relative to reg-reg move (2).  */
348
  {3, 4, 3},                            /* cost of storing integer registers */
349
  4,                                    /* cost of reg,reg fld/fst */
350
  {4, 4, 12},                           /* cost of loading fp registers
351
                                           in SFmode, DFmode and XFmode */
352
  {6, 6, 8},                            /* cost of loading integer registers */
353
  2,                                    /* cost of moving MMX register */
354
  {4, 4},                               /* cost of loading MMX registers
355
                                           in SImode and DImode */
356
  {4, 4},                               /* cost of storing MMX registers
357
                                           in SImode and DImode */
358
  2,                                    /* cost of moving SSE register */
359
  {4, 4, 6},                            /* cost of loading SSE registers
360
                                           in SImode, DImode and TImode */
361
  {4, 4, 5},                            /* cost of storing SSE registers
362
                                           in SImode, DImode and TImode */
363
  5,                                    /* MMX or SSE register to integer */
364
  64,                                   /* size of prefetch block */
365
  6,                                    /* number of parallel prefetches */
366
  5,                                    /* Branch cost */
367
  4,                                    /* cost of FADD and FSUB insns.  */
368
  4,                                    /* cost of FMUL instruction.  */
369
  24,                                   /* cost of FDIV instruction.  */
370
  2,                                    /* cost of FABS instruction.  */
371
  2,                                    /* cost of FCHS instruction.  */
372
  35,                                   /* cost of FSQRT instruction.  */
373
};
374
 
375
static const
376
struct processor_costs k8_cost = {
377
  1,                                    /* cost of an add instruction */
378
  2,                                    /* cost of a lea instruction */
379
  1,                                    /* variable shift costs */
380
  1,                                    /* constant shift costs */
381
  {3, 4, 3, 4, 5},                      /* cost of starting a multiply */
382
  0,                                     /* cost of multiply per each bit set */
383
  {18, 26, 42, 74, 74},                 /* cost of a divide/mod */
384
  1,                                    /* cost of movsx */
385
  1,                                    /* cost of movzx */
386
  8,                                    /* "large" insn */
387
  9,                                    /* MOVE_RATIO */
388
  4,                                    /* cost for loading QImode using movzbl */
389
  {3, 4, 3},                            /* cost of loading integer registers
390
                                           in QImode, HImode and SImode.
391
                                           Relative to reg-reg move (2).  */
392
  {3, 4, 3},                            /* cost of storing integer registers */
393
  4,                                    /* cost of reg,reg fld/fst */
394
  {4, 4, 12},                           /* cost of loading fp registers
395
                                           in SFmode, DFmode and XFmode */
396
  {6, 6, 8},                            /* cost of loading integer registers */
397
  2,                                    /* cost of moving MMX register */
398
  {3, 3},                               /* cost of loading MMX registers
399
                                           in SImode and DImode */
400
  {4, 4},                               /* cost of storing MMX registers
401
                                           in SImode and DImode */
402
  2,                                    /* cost of moving SSE register */
403
  {4, 3, 6},                            /* cost of loading SSE registers
404
                                           in SImode, DImode and TImode */
405
  {4, 4, 5},                            /* cost of storing SSE registers
406
                                           in SImode, DImode and TImode */
407
  5,                                    /* MMX or SSE register to integer */
408
  64,                                   /* size of prefetch block */
409
  6,                                    /* number of parallel prefetches */
410
  5,                                    /* Branch cost */
411
  4,                                    /* cost of FADD and FSUB insns.  */
412
  4,                                    /* cost of FMUL instruction.  */
413
  19,                                   /* cost of FDIV instruction.  */
414
  2,                                    /* cost of FABS instruction.  */
415
  2,                                    /* cost of FCHS instruction.  */
416
  35,                                   /* cost of FSQRT instruction.  */
417
};
418
 
419
static const
420
struct processor_costs pentium4_cost = {
421
  1,                                    /* cost of an add instruction */
422
  3,                                    /* cost of a lea instruction */
423
  4,                                    /* variable shift costs */
424
  4,                                    /* constant shift costs */
425
  {15, 15, 15, 15, 15},                 /* cost of starting a multiply */
426
  0,                                     /* cost of multiply per each bit set */
427
  {56, 56, 56, 56, 56},                 /* cost of a divide/mod */
428
  1,                                    /* cost of movsx */
429
  1,                                    /* cost of movzx */
430
  16,                                   /* "large" insn */
431
  6,                                    /* MOVE_RATIO */
432
  2,                                    /* cost for loading QImode using movzbl */
433
  {4, 5, 4},                            /* cost of loading integer registers
434
                                           in QImode, HImode and SImode.
435
                                           Relative to reg-reg move (2).  */
436
  {2, 3, 2},                            /* cost of storing integer registers */
437
  2,                                    /* cost of reg,reg fld/fst */
438
  {2, 2, 6},                            /* cost of loading fp registers
439
                                           in SFmode, DFmode and XFmode */
440
  {4, 4, 6},                            /* cost of loading integer registers */
441
  2,                                    /* cost of moving MMX register */
442
  {2, 2},                               /* cost of loading MMX registers
443
                                           in SImode and DImode */
444
  {2, 2},                               /* cost of storing MMX registers
445
                                           in SImode and DImode */
446
  12,                                   /* cost of moving SSE register */
447
  {12, 12, 12},                         /* cost of loading SSE registers
448
                                           in SImode, DImode and TImode */
449
  {2, 2, 8},                            /* cost of storing SSE registers
450
                                           in SImode, DImode and TImode */
451
  10,                                   /* MMX or SSE register to integer */
452
  64,                                   /* size of prefetch block */
453
  6,                                    /* number of parallel prefetches */
454
  2,                                    /* Branch cost */
455
  5,                                    /* cost of FADD and FSUB insns.  */
456
  7,                                    /* cost of FMUL instruction.  */
457
  43,                                   /* cost of FDIV instruction.  */
458
  2,                                    /* cost of FABS instruction.  */
459
  2,                                    /* cost of FCHS instruction.  */
460
  43,                                   /* cost of FSQRT instruction.  */
461
};
462
 
463
static const
464
struct processor_costs nocona_cost = {
465
  1,                                    /* cost of an add instruction */
466
  1,                                    /* cost of a lea instruction */
467
  1,                                    /* variable shift costs */
468
  1,                                    /* constant shift costs */
469
  {10, 10, 10, 10, 10},                 /* cost of starting a multiply */
470
  0,                                     /* cost of multiply per each bit set */
471
  {66, 66, 66, 66, 66},                 /* cost of a divide/mod */
472
  1,                                    /* cost of movsx */
473
  1,                                    /* cost of movzx */
474
  16,                                   /* "large" insn */
475
  17,                                   /* MOVE_RATIO */
476
  4,                                    /* cost for loading QImode using movzbl */
477
  {4, 4, 4},                            /* cost of loading integer registers
478
                                           in QImode, HImode and SImode.
479
                                           Relative to reg-reg move (2).  */
480
  {4, 4, 4},                            /* cost of storing integer registers */
481
  3,                                    /* cost of reg,reg fld/fst */
482
  {12, 12, 12},                         /* cost of loading fp registers
483
                                           in SFmode, DFmode and XFmode */
484
  {4, 4, 4},                            /* cost of loading integer registers */
485
  6,                                    /* cost of moving MMX register */
486
  {12, 12},                             /* cost of loading MMX registers
487
                                           in SImode and DImode */
488
  {12, 12},                             /* cost of storing MMX registers
489
                                           in SImode and DImode */
490
  6,                                    /* cost of moving SSE register */
491
  {12, 12, 12},                         /* cost of loading SSE registers
492
                                           in SImode, DImode and TImode */
493
  {12, 12, 12},                         /* cost of storing SSE registers
494
                                           in SImode, DImode and TImode */
495
  8,                                    /* MMX or SSE register to integer */
496
  128,                                  /* size of prefetch block */
497
  8,                                    /* number of parallel prefetches */
498
  1,                                    /* Branch cost */
499
  6,                                    /* cost of FADD and FSUB insns.  */
500
  8,                                    /* cost of FMUL instruction.  */
501
  40,                                   /* cost of FDIV instruction.  */
502
  3,                                    /* cost of FABS instruction.  */
503
  3,                                    /* cost of FCHS instruction.  */
504
  44,                                   /* cost of FSQRT instruction.  */
505
};
506
 
507
const struct processor_costs *ix86_cost = &pentium_cost;
508
 
509
/* Processor feature/optimization bitmasks.  */
510
#define m_386 (1<<PROCESSOR_I386)
511
#define m_486 (1<<PROCESSOR_I486)
512
#define m_PENT (1<<PROCESSOR_PENTIUM)
513
#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
514
#define m_K6  (1<<PROCESSOR_K6)
515
#define m_ATHLON  (1<<PROCESSOR_ATHLON)
516
#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
517
#define m_K8  (1<<PROCESSOR_K8)
518
#define m_ATHLON_K8  (m_K8 | m_ATHLON)
519
#define m_NOCONA  (1<<PROCESSOR_NOCONA)
520
 
521
const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
522
const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
523
const int x86_zero_extend_with_and = m_486 | m_PENT;
524
const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
525
const int x86_double_with_add = ~m_386;
526
const int x86_use_bit_test = m_386;
527
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
528
const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
529
const int x86_fisttp = m_NOCONA;
530
const int x86_3dnow_a = m_ATHLON_K8;
531
const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
532
/* Branch hints were put in P4 based on simulation result. But
533
   after P4 was made, no performance benefit was observed with
534
   branch hints. It also increases the code size. As the result,
535
   icc never generates branch hints.  */
536
const int x86_branch_hints = 0;
537
const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
538
const int x86_partial_reg_stall = m_PPRO;
539
const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
540
const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
541
const int x86_use_mov0 = m_K6;
542
const int x86_use_cltd = ~(m_PENT | m_K6);
543
const int x86_read_modify_write = ~m_PENT;
544
const int x86_read_modify = ~(m_PENT | m_PPRO);
545
const int x86_split_long_moves = m_PPRO;
546
const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
547
const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
548
const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
549
const int x86_qimode_math = ~(0);
550
const int x86_promote_qi_regs = 0;
551
const int x86_himode_math = ~(m_PPRO);
552
const int x86_promote_hi_regs = m_PPRO;
553
const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
554
const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
555
const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
556
const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
557
const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
558
const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
559
const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560
const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
561
const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
562
const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
563
const int x86_decompose_lea = m_PENT4 | m_NOCONA;
564
const int x86_shift1 = ~m_486;
565
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
566
const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
567
/* Set for machines where the type and dependencies are resolved on SSE
568
   register parts instead of whole registers, so we may maintain just
569
   lower part of scalar values in proper format leaving the upper part
570
   undefined.  */
571
const int x86_sse_split_regs = m_ATHLON_K8;
572
const int x86_sse_typeless_stores = m_ATHLON_K8;
573
const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
574
const int x86_use_ffreep = m_ATHLON_K8;
575
const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
576
 
577
/* ??? Allowing interunit moves makes it all too easy for the compiler to put
578
   integer data in xmm registers.  Which results in pretty abysmal code.  */
579
const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
580
 
581
const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
582
/* Some CPU cores are not able to predict more than 4 branch instructions in
583
   the 16 byte window.  */
584
const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
585
const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
586
const int x86_use_bt = m_ATHLON_K8;
587
/* Compare and exchange was added for 80486.  */
588
const int x86_cmpxchg = ~m_386;
589
/* Exchange and add was added for 80486.  */
590
const int x86_xadd = ~m_386;
591
 
592
/* In case the average insn count for single function invocation is
593
   lower than this constant, emit fast (but longer) prologue and
594
   epilogue code.  */
595
#define FAST_PROLOGUE_INSN_COUNT 20
596
 
597
/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
598
static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
599
static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
600
static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
601
 
602
/* Array of the smallest class containing reg number REGNO, indexed by
603
   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
604
 
605
enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
606
{
607
  /* ax, dx, cx, bx */
608
  AREG, DREG, CREG, BREG,
609
  /* si, di, bp, sp */
610
  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
611
  /* FP registers */
612
  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
613
  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
614
  /* arg pointer */
615
  NON_Q_REGS,
616
  /* flags, fpsr, dirflag, frame */
617
  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
618
  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
619
  SSE_REGS, SSE_REGS,
620
  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
621
  MMX_REGS, MMX_REGS,
622
  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
623
  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
624
  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
625
  SSE_REGS, SSE_REGS,
626
};
627
 
628
/* The "default" register map used in 32bit mode.  */
629
 
630
int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
631
{
632
  0, 2, 1, 3, 6, 7, 4, 5,                /* general regs */
633
  12, 13, 14, 15, 16, 17, 18, 19,       /* fp regs */
634
  -1, -1, -1, -1, -1,                   /* arg, flags, fpsr, dir, frame */
635
  21, 22, 23, 24, 25, 26, 27, 28,       /* SSE */
636
  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
637
  -1, -1, -1, -1, -1, -1, -1, -1,       /* extended integer registers */
638
  -1, -1, -1, -1, -1, -1, -1, -1,       /* extended SSE registers */
639
};
640
 
641
static int const x86_64_int_parameter_registers[6] =
642
{
643
  5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
644
  FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
645
};
646
 
647
static int const x86_64_int_return_registers[4] =
648
{
649
 
650
};
651
 
652
/* The "default" register map used in 64bit mode.  */
653
int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
654
{
655
  0, 1, 2, 3, 4, 5, 6, 7,                /* general regs */
656
  33, 34, 35, 36, 37, 38, 39, 40,       /* fp regs */
657
  -1, -1, -1, -1, -1,                   /* arg, flags, fpsr, dir, frame */
658
  17, 18, 19, 20, 21, 22, 23, 24,       /* SSE */
659
  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
660
  8,9,10,11,12,13,14,15,                /* extended integer registers */
661
  25, 26, 27, 28, 29, 30, 31, 32,       /* extended SSE registers */
662
};
663
 
664
/* Define the register numbers to be used in Dwarf debugging information.
665
   The SVR4 reference port C compiler uses the following register numbers
666
   in its Dwarf output code:
667
 
668
        1 for %ecx (gcc regno = 2)
669
        2 for %edx (gcc regno = 1)
670
        3 for %ebx (gcc regno = 3)
671
        4 for %esp (gcc regno = 7)
672
        5 for %ebp (gcc regno = 6)
673
        6 for %esi (gcc regno = 4)
674
        7 for %edi (gcc regno = 5)
675
   The following three DWARF register numbers are never generated by
676
   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
677
   believes these numbers have these meanings.
678
        8  for %eip    (no gcc equivalent)
679
        9  for %eflags (gcc regno = 17)
680
        10 for %trapno (no gcc equivalent)
681
   It is not at all clear how we should number the FP stack registers
682
   for the x86 architecture.  If the version of SDB on x86/svr4 were
683
   a bit less brain dead with respect to floating-point then we would
684
   have a precedent to follow with respect to DWARF register numbers
685
   for x86 FP registers, but the SDB on x86/svr4 is so completely
686
   broken with respect to FP registers that it is hardly worth thinking
687
   of it as something to strive for compatibility with.
688
   The version of x86/svr4 SDB I have at the moment does (partially)
689
   seem to believe that DWARF register number 11 is associated with
690
   the x86 register %st(0), but that's about all.  Higher DWARF
691
   register numbers don't seem to be associated with anything in
692
   particular, and even for DWARF regno 11, SDB only seems to under-
693
   stand that it should say that a variable lives in %st(0) (when
694
   asked via an `=' command) if we said it was in DWARF regno 11,
695
   but SDB still prints garbage when asked for the value of the
696
   variable in question (via a `/' command).
697
   (Also note that the labels SDB prints for various FP stack regs
698
   when doing an `x' command are all wrong.)
699
   Note that these problems generally don't affect the native SVR4
700
   C compiler because it doesn't allow the use of -O with -g and
701
   because when it is *not* optimizing, it allocates a memory
702
   location for each floating-point variable, and the memory
703
   location is what gets described in the DWARF AT_location
704
   attribute for the variable in question.
705
   Regardless of the severe mental illness of the x86/svr4 SDB, we
706
   do something sensible here and we use the following DWARF
707
   register numbers.  Note that these are all stack-top-relative
708
   numbers.
709
        11 for %st(0) (gcc regno = 8)
710
        12 for %st(1) (gcc regno = 9)
711
        13 for %st(2) (gcc regno = 10)
712
        14 for %st(3) (gcc regno = 11)
713
        15 for %st(4) (gcc regno = 12)
714
        16 for %st(5) (gcc regno = 13)
715
        17 for %st(6) (gcc regno = 14)
716
        18 for %st(7) (gcc regno = 15)
717
*/
718
int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
719
{
720
  0, 2, 1, 3, 6, 7, 5, 4,                /* general regs */
721
  11, 12, 13, 14, 15, 16, 17, 18,       /* fp regs */
722
  -1, 9, -1, -1, -1,                    /* arg, flags, fpsr, dir, frame */
723
  21, 22, 23, 24, 25, 26, 27, 28,       /* SSE registers */
724
  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX registers */
725
  -1, -1, -1, -1, -1, -1, -1, -1,       /* extended integer registers */
726
  -1, -1, -1, -1, -1, -1, -1, -1,       /* extended SSE registers */
727
};
728
 
729
/* Test and compare insns in i386.md store the information needed to
730
   generate branch and scc insns here.  */
731
 
732
rtx ix86_compare_op0 = NULL_RTX;
733
rtx ix86_compare_op1 = NULL_RTX;
734
rtx ix86_compare_emitted = NULL_RTX;
735
 
736
/* Size of the register save area.  */
737
#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
738
 
739
/* Define the structure for the machine field in struct function.  */
740
 
741
struct stack_local_entry GTY(())
742
{
743
  unsigned short mode;
744
  unsigned short n;
745
  rtx rtl;
746
  struct stack_local_entry *next;
747
};
748
 
749
/* Structure describing stack frame layout.
750
   Stack grows downward:
751
 
752
   [arguments]
753
                                              <- ARG_POINTER
754
   saved pc
755
 
756
   saved frame pointer if frame_pointer_needed
757
                                              <- HARD_FRAME_POINTER
758
   [saved regs]
759
 
760
   [padding1]          \
761
                        )
762
   [va_arg registers]  (
763
                        > to_allocate         <- FRAME_POINTER
764
   [frame]             (
765
                        )
766
   [padding2]          /
767
  */
768
struct ix86_frame
769
{
770
  int nregs;
771
  int padding1;
772
  int va_arg_size;
773
  HOST_WIDE_INT frame;
774
  int padding2;
775
  int outgoing_arguments_size;
776
  int red_zone_size;
777
 
778
  HOST_WIDE_INT to_allocate;
779
  /* The offsets relative to ARG_POINTER.  */
780
  HOST_WIDE_INT frame_pointer_offset;
781
  HOST_WIDE_INT hard_frame_pointer_offset;
782
  HOST_WIDE_INT stack_pointer_offset;
783
 
784
  /* When save_regs_using_mov is set, emit prologue using
785
     move instead of push instructions.  */
786
  bool save_regs_using_mov;
787
};
788
 
789
/* Code model option.  */
790
enum cmodel ix86_cmodel;
791
/* Asm dialect.  */
792
enum asm_dialect ix86_asm_dialect = ASM_ATT;
793
/* TLS dialext.  */
794
enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
795
 
796
/* Which unit we are generating floating point math for.  */
797
enum fpmath_unit ix86_fpmath;
798
 
799
/* Which cpu are we scheduling for.  */
800
enum processor_type ix86_tune;
801
/* Which instruction set architecture to use.  */
802
enum processor_type ix86_arch;
803
 
804
/* true if sse prefetch instruction is not NOOP.  */
805
int x86_prefetch_sse;
806
 
807
/* ix86_regparm_string as a number */
808
static int ix86_regparm;
809
 
810
/* Preferred alignment for stack boundary in bits.  */
811
unsigned int ix86_preferred_stack_boundary;
812
 
813
/* Values 1-5: see jump.c */
814
int ix86_branch_cost;
815
 
816
/* Variables which are this size or smaller are put in the data/bss
817
   or ldata/lbss sections.  */
818
 
819
int ix86_section_threshold = 65536;
820
 
821
/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
822
char internal_label_prefix[16];
823
int internal_label_prefix_len;
824
 
825
static bool ix86_handle_option (size_t, const char *, int);
826
static void output_pic_addr_const (FILE *, rtx, int);
827
static void put_condition_code (enum rtx_code, enum machine_mode,
828
                                int, int, FILE *);
829
static const char *get_some_local_dynamic_name (void);
830
static int get_some_local_dynamic_name_1 (rtx *, void *);
831
static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
832
static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
833
                                                   rtx *);
834
static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
835
static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
836
                                                   enum machine_mode);
837
static rtx get_thread_pointer (int);
838
static rtx legitimize_tls_address (rtx, enum tls_model, int);
839
static void get_pc_thunk_name (char [32], unsigned int);
840
static rtx gen_push (rtx);
841
static int ix86_flags_dependant (rtx, rtx, enum attr_type);
842
static int ix86_agi_dependant (rtx, rtx, enum attr_type);
843
static struct machine_function * ix86_init_machine_status (void);
844
static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
845
static int ix86_nsaved_regs (void);
846
static void ix86_emit_save_regs (void);
847
static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
848
static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
849
static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
850
static HOST_WIDE_INT ix86_GOT_alias_set (void);
851
static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
852
static rtx ix86_expand_aligntest (rtx, int);
853
static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
854
static int ix86_issue_rate (void);
855
static int ix86_adjust_cost (rtx, rtx, rtx, int);
856
static int ia32_multipass_dfa_lookahead (void);
857
static void ix86_init_mmx_sse_builtins (void);
858
static rtx x86_this_parameter (tree);
859
static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
860
                                 HOST_WIDE_INT, tree);
861
static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
862
static void x86_file_start (void);
863
static void ix86_reorg (void);
864
static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
865
static tree ix86_build_builtin_va_list (void);
866
static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
867
                                         tree, int *, int);
868
static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
869
static bool ix86_vector_mode_supported_p (enum machine_mode);
870
 
871
static int ix86_address_cost (rtx);
872
static bool ix86_cannot_force_const_mem (rtx);
873
static rtx ix86_delegitimize_address (rtx);
874
 
875
static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
876
 
877
struct builtin_description;
878
static rtx ix86_expand_sse_comi (const struct builtin_description *,
879
                                 tree, rtx);
880
static rtx ix86_expand_sse_compare (const struct builtin_description *,
881
                                    tree, rtx);
882
static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
883
static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
884
static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
885
static rtx ix86_expand_store_builtin (enum insn_code, tree);
886
static rtx safe_vector_operand (rtx, enum machine_mode);
887
static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
888
static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
889
static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
890
static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
891
static int ix86_fp_comparison_cost (enum rtx_code code);
892
static unsigned int ix86_select_alt_pic_regnum (void);
893
static int ix86_save_reg (unsigned int, int);
894
static void ix86_compute_frame_layout (struct ix86_frame *);
895
static int ix86_comp_type_attributes (tree, tree);
896
static int ix86_function_regparm (tree, tree);
897
const struct attribute_spec ix86_attribute_table[];
898
static bool ix86_function_ok_for_sibcall (tree, tree);
899
static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
900
static int ix86_value_regno (enum machine_mode, tree, tree);
901
static bool contains_128bit_aligned_vector_p (tree);
902
static rtx ix86_struct_value_rtx (tree, int);
903
static bool ix86_ms_bitfield_layout_p (tree);
904
static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
905
static int extended_reg_mentioned_1 (rtx *, void *);
906
static bool ix86_rtx_costs (rtx, int, int, int *);
907
static int min_insn_size (rtx);
908
static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
909
static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
910
static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
911
                                    tree, bool);
912
static void ix86_init_builtins (void);
913
static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
914
static const char *ix86_mangle_fundamental_type (tree);
915
static tree ix86_stack_protect_fail (void);
916
static rtx ix86_internal_arg_pointer (void);
917
static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
918
 
919
/* This function is only used on Solaris.  */
920
static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
921
  ATTRIBUTE_UNUSED;
922
 
923
/* Register class used for passing given 64bit part of the argument.
924
   These represent classes as documented by the PS ABI, with the exception
925
   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
926
   use SF or DFmode move instead of DImode to avoid reformatting penalties.
927
 
928
   Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
929
   whenever possible (upper half does contain padding).
930
 */
931
enum x86_64_reg_class
932
  {
933
    X86_64_NO_CLASS,
934
    X86_64_INTEGER_CLASS,
935
    X86_64_INTEGERSI_CLASS,
936
    X86_64_SSE_CLASS,
937
    X86_64_SSESF_CLASS,
938
    X86_64_SSEDF_CLASS,
939
    X86_64_SSEUP_CLASS,
940
    X86_64_X87_CLASS,
941
    X86_64_X87UP_CLASS,
942
    X86_64_COMPLEX_X87_CLASS,
943
    X86_64_MEMORY_CLASS
944
  };
945
static const char * const x86_64_reg_class_name[] = {
946
  "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
947
  "sseup", "x87", "x87up", "cplx87", "no"
948
};
949
 
950
#define MAX_CLASSES 4
951
 
952
/* Table of constants used by fldpi, fldln2, etc....  */
953
static REAL_VALUE_TYPE ext_80387_constants_table [5];
954
static bool ext_80387_constants_init = 0;
955
static void init_ext_80387_constants (void);
956
static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
957
static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
958
static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
959
static void x86_64_elf_select_section (tree decl, int reloc,
960
                                       unsigned HOST_WIDE_INT align)
961
                                      ATTRIBUTE_UNUSED;
962
 
963
/* Initialize the GCC target structure.  */
964
#undef TARGET_ATTRIBUTE_TABLE
965
#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
966
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
967
#  undef TARGET_MERGE_DECL_ATTRIBUTES
968
#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
969
#endif
970
 
971
#undef TARGET_COMP_TYPE_ATTRIBUTES
972
#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
973
 
974
#undef TARGET_INIT_BUILTINS
975
#define TARGET_INIT_BUILTINS ix86_init_builtins
976
#undef TARGET_EXPAND_BUILTIN
977
#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
978
 
979
#undef TARGET_ASM_FUNCTION_EPILOGUE
980
#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
981
 
982
#undef TARGET_ENCODE_SECTION_INFO
983
#ifndef SUBTARGET_ENCODE_SECTION_INFO
984
#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
985
#else
986
#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
987
#endif
988
 
989
#undef TARGET_ASM_OPEN_PAREN
990
#define TARGET_ASM_OPEN_PAREN ""
991
#undef TARGET_ASM_CLOSE_PAREN
992
#define TARGET_ASM_CLOSE_PAREN ""
993
 
994
#undef TARGET_ASM_ALIGNED_HI_OP
995
#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
996
#undef TARGET_ASM_ALIGNED_SI_OP
997
#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
998
#ifdef ASM_QUAD
999
#undef TARGET_ASM_ALIGNED_DI_OP
1000
#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1001
#endif
1002
 
1003
#undef TARGET_ASM_UNALIGNED_HI_OP
1004
#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1005
#undef TARGET_ASM_UNALIGNED_SI_OP
1006
#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1007
#undef TARGET_ASM_UNALIGNED_DI_OP
1008
#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1009
 
1010
#undef TARGET_SCHED_ADJUST_COST
1011
#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1012
#undef TARGET_SCHED_ISSUE_RATE
1013
#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1014
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1015
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1016
  ia32_multipass_dfa_lookahead
1017
 
1018
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1019
#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1020
 
1021
#ifdef HAVE_AS_TLS
1022
#undef TARGET_HAVE_TLS
1023
#define TARGET_HAVE_TLS true
1024
#endif
1025
#undef TARGET_CANNOT_FORCE_CONST_MEM
1026
#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1027
 
1028
#undef TARGET_DELEGITIMIZE_ADDRESS
1029
#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1030
 
1031
#undef TARGET_MS_BITFIELD_LAYOUT_P
1032
#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1033
 
1034
#if TARGET_MACHO
1035
#undef TARGET_BINDS_LOCAL_P
1036
#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1037
#endif
1038
 
1039
#undef TARGET_ASM_OUTPUT_MI_THUNK
1040
#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1041
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1042
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1043
 
1044
#undef TARGET_ASM_FILE_START
1045
#define TARGET_ASM_FILE_START x86_file_start
1046
 
1047
#undef TARGET_DEFAULT_TARGET_FLAGS
1048
#define TARGET_DEFAULT_TARGET_FLAGS     \
1049
  (TARGET_DEFAULT                       \
1050
   | TARGET_64BIT_DEFAULT               \
1051
   | TARGET_SUBTARGET_DEFAULT           \
1052
   | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1053
 
1054
#undef TARGET_HANDLE_OPTION
1055
#define TARGET_HANDLE_OPTION ix86_handle_option
1056
 
1057
#undef TARGET_RTX_COSTS
1058
#define TARGET_RTX_COSTS ix86_rtx_costs
1059
#undef TARGET_ADDRESS_COST
1060
#define TARGET_ADDRESS_COST ix86_address_cost
1061
 
1062
#undef TARGET_FIXED_CONDITION_CODE_REGS
1063
#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1064
#undef TARGET_CC_MODES_COMPATIBLE
1065
#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1066
 
1067
#undef TARGET_MACHINE_DEPENDENT_REORG
1068
#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1069
 
1070
#undef TARGET_BUILD_BUILTIN_VA_LIST
1071
#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1072
 
1073
#undef TARGET_MD_ASM_CLOBBERS
1074
#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1075
 
1076
#undef TARGET_PROMOTE_PROTOTYPES
1077
#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1078
#undef TARGET_STRUCT_VALUE_RTX
1079
#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1080
#undef TARGET_SETUP_INCOMING_VARARGS
1081
#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1082
#undef TARGET_MUST_PASS_IN_STACK
1083
#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1084
#undef TARGET_PASS_BY_REFERENCE
1085
#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1086
#undef TARGET_INTERNAL_ARG_POINTER
1087
#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1088
#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1089
#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1090
 
1091
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1092
#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1093
 
1094
#undef TARGET_VECTOR_MODE_SUPPORTED_P
1095
#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1096
 
1097
#ifdef HAVE_AS_TLS
1098
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1099
#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1100
#endif
1101
 
1102
#ifdef SUBTARGET_INSERT_ATTRIBUTES
1103
#undef TARGET_INSERT_ATTRIBUTES
1104
#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1105
#endif
1106
 
1107
#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1108
#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1109
 
1110
#undef TARGET_STACK_PROTECT_FAIL
1111
#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1112
 
1113
#undef TARGET_FUNCTION_VALUE
1114
#define TARGET_FUNCTION_VALUE ix86_function_value
1115
 
1116
struct gcc_target targetm = TARGET_INITIALIZER;
1117
 
1118
 
1119
/* The svr4 ABI for the i386 says that records and unions are returned
1120
   in memory.  */
1121
#ifndef DEFAULT_PCC_STRUCT_RETURN
1122
#define DEFAULT_PCC_STRUCT_RETURN 1
1123
#endif
1124
 
1125
/* Implement TARGET_HANDLE_OPTION.  */
1126
 
1127
static bool
1128
ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1129
{
1130
  switch (code)
1131
    {
1132
    case OPT_m3dnow:
1133
      if (!value)
1134
        {
1135
          target_flags &= ~MASK_3DNOW_A;
1136
          target_flags_explicit |= MASK_3DNOW_A;
1137
        }
1138
      return true;
1139
 
1140
    case OPT_mmmx:
1141
      if (!value)
1142
        {
1143
          target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1144
          target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1145
        }
1146
      return true;
1147
 
1148
    case OPT_msse:
1149
      if (!value)
1150
        {
1151
          target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1152
          target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1153
        }
1154
      return true;
1155
 
1156
    case OPT_msse2:
1157
      if (!value)
1158
        {
1159
          target_flags &= ~MASK_SSE3;
1160
          target_flags_explicit |= MASK_SSE3;
1161
        }
1162
      return true;
1163
 
1164
    default:
1165
      return true;
1166
    }
1167
}
1168
 
1169
/* Sometimes certain combinations of command options do not make
1170
   sense on a particular target machine.  You can define a macro
1171
   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
1172
   defined, is executed once just after all the command options have
1173
   been parsed.
1174
 
1175
   Don't use this macro to turn on various extra optimizations for
1176
   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
1177
 
1178
void
1179
override_options (void)
1180
{
1181
  int i;
1182
  int ix86_tune_defaulted = 0;
1183
 
1184
  /* Comes from final.c -- no real reason to change it.  */
1185
#define MAX_CODE_ALIGN 16
1186
 
1187
  static struct ptt
1188
    {
1189
      const struct processor_costs *cost;       /* Processor costs */
1190
      const int target_enable;                  /* Target flags to enable.  */
1191
      const int target_disable;                 /* Target flags to disable.  */
1192
      const int align_loop;                     /* Default alignments.  */
1193
      const int align_loop_max_skip;
1194
      const int align_jump;
1195
      const int align_jump_max_skip;
1196
      const int align_func;
1197
    }
1198
  const processor_target_table[PROCESSOR_max] =
1199
    {
1200
      {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1201
      {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1202
      {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1203
      {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1204
      {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1205
      {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1206
      {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1207
      {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1208
      {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1209
    };
1210
 
1211
  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1212
  static struct pta
1213
    {
1214
      const char *const name;           /* processor name or nickname.  */
1215
      const enum processor_type processor;
1216
      const enum pta_flags
1217
        {
1218
          PTA_SSE = 1,
1219
          PTA_SSE2 = 2,
1220
          PTA_SSE3 = 4,
1221
          PTA_MMX = 8,
1222
          PTA_PREFETCH_SSE = 16,
1223
          PTA_3DNOW = 32,
1224
          PTA_3DNOW_A = 64,
1225
          PTA_64BIT = 128
1226
        } flags;
1227
    }
1228
  const processor_alias_table[] =
1229
    {
1230
      {"i386", PROCESSOR_I386, 0},
1231
      {"i486", PROCESSOR_I486, 0},
1232
      {"i586", PROCESSOR_PENTIUM, 0},
1233
      {"pentium", PROCESSOR_PENTIUM, 0},
1234
      {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1235
      {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1236
      {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1237
      {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1238
      {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1239
      {"i686", PROCESSOR_PENTIUMPRO, 0},
1240
      {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1241
      {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1242
      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1243
      {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1244
      {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1245
      {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1246
                                       | PTA_MMX | PTA_PREFETCH_SSE},
1247
      {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1248
                                        | PTA_MMX | PTA_PREFETCH_SSE},
1249
      {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1250
                                        | PTA_MMX | PTA_PREFETCH_SSE},
1251
      {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1252
                                        | PTA_MMX | PTA_PREFETCH_SSE},
1253
      {"k6", PROCESSOR_K6, PTA_MMX},
1254
      {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1255
      {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1256
      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1257
                                   | PTA_3DNOW_A},
1258
      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1259
                                         | PTA_3DNOW | PTA_3DNOW_A},
1260
      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1261
                                    | PTA_3DNOW_A | PTA_SSE},
1262
      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1263
                                      | PTA_3DNOW_A | PTA_SSE},
1264
      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1265
                                      | PTA_3DNOW_A | PTA_SSE},
1266
      {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1267
                               | PTA_SSE | PTA_SSE2 },
1268
      {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1269
                                      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1270
      {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1271
                                      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1272
      {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1273
                                      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1274
      {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1275
                                      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1276
    };
1277
 
1278
  int const pta_size = ARRAY_SIZE (processor_alias_table);
1279
 
1280
#ifdef SUBTARGET_OVERRIDE_OPTIONS
1281
  SUBTARGET_OVERRIDE_OPTIONS;
1282
#endif
1283
 
1284
  /* Set the default values for switches whose default depends on TARGET_64BIT
1285
     in case they weren't overwritten by command line options.  */
1286
  if (TARGET_64BIT)
1287
    {
1288
      if (flag_omit_frame_pointer == 2)
1289
        flag_omit_frame_pointer = 1;
1290
      if (flag_asynchronous_unwind_tables == 2)
1291
        flag_asynchronous_unwind_tables = 1;
1292
      if (flag_pcc_struct_return == 2)
1293
        flag_pcc_struct_return = 0;
1294
    }
1295
  else
1296
    {
1297
      if (flag_omit_frame_pointer == 2)
1298
        flag_omit_frame_pointer = 0;
1299
      if (flag_asynchronous_unwind_tables == 2)
1300
        flag_asynchronous_unwind_tables = 0;
1301
      if (flag_pcc_struct_return == 2)
1302
        flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1303
    }
1304
 
1305
  if (!ix86_tune_string && ix86_arch_string)
1306
    ix86_tune_string = ix86_arch_string;
1307
  if (!ix86_tune_string)
1308
    {
1309
      ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1310
      ix86_tune_defaulted = 1;
1311
    }
1312
  if (!ix86_arch_string)
1313
    ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1314
 
1315
  if (ix86_cmodel_string != 0)
1316
    {
1317
      if (!strcmp (ix86_cmodel_string, "small"))
1318
        ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1319
      else if (!strcmp (ix86_cmodel_string, "medium"))
1320
        ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1321
      else if (flag_pic)
1322
        sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1323
      else if (!strcmp (ix86_cmodel_string, "32"))
1324
        ix86_cmodel = CM_32;
1325
      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1326
        ix86_cmodel = CM_KERNEL;
1327
      else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1328
        ix86_cmodel = CM_LARGE;
1329
      else
1330
        error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1331
    }
1332
  else
1333
    {
1334
      ix86_cmodel = CM_32;
1335
      if (TARGET_64BIT)
1336
        ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1337
    }
1338
  if (ix86_asm_string != 0)
1339
    {
1340
      if (! TARGET_MACHO
1341
          && !strcmp (ix86_asm_string, "intel"))
1342
        ix86_asm_dialect = ASM_INTEL;
1343
      else if (!strcmp (ix86_asm_string, "att"))
1344
        ix86_asm_dialect = ASM_ATT;
1345
      else
1346
        error ("bad value (%s) for -masm= switch", ix86_asm_string);
1347
    }
1348
  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1349
    error ("code model %qs not supported in the %s bit mode",
1350
           ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1351
  if (ix86_cmodel == CM_LARGE)
1352
    sorry ("code model %<large%> not supported yet");
1353
  if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1354
    sorry ("%i-bit mode not compiled in",
1355
           (target_flags & MASK_64BIT) ? 64 : 32);
1356
 
1357
  for (i = 0; i < pta_size; i++)
1358
    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1359
      {
1360
        ix86_arch = processor_alias_table[i].processor;
1361
        /* Default cpu tuning to the architecture.  */
1362
        ix86_tune = ix86_arch;
1363
        if (processor_alias_table[i].flags & PTA_MMX
1364
            && !(target_flags_explicit & MASK_MMX))
1365
          target_flags |= MASK_MMX;
1366
        if (processor_alias_table[i].flags & PTA_3DNOW
1367
            && !(target_flags_explicit & MASK_3DNOW))
1368
          target_flags |= MASK_3DNOW;
1369
        if (processor_alias_table[i].flags & PTA_3DNOW_A
1370
            && !(target_flags_explicit & MASK_3DNOW_A))
1371
          target_flags |= MASK_3DNOW_A;
1372
        if (processor_alias_table[i].flags & PTA_SSE
1373
            && !(target_flags_explicit & MASK_SSE))
1374
          target_flags |= MASK_SSE;
1375
        if (processor_alias_table[i].flags & PTA_SSE2
1376
            && !(target_flags_explicit & MASK_SSE2))
1377
          target_flags |= MASK_SSE2;
1378
        if (processor_alias_table[i].flags & PTA_SSE3
1379
            && !(target_flags_explicit & MASK_SSE3))
1380
          target_flags |= MASK_SSE3;
1381
        if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1382
          x86_prefetch_sse = true;
1383
        if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1384
          error ("CPU you selected does not support x86-64 "
1385
                 "instruction set");
1386
        break;
1387
      }
1388
 
1389
  if (i == pta_size)
1390
    error ("bad value (%s) for -march= switch", ix86_arch_string);
1391
 
1392
  for (i = 0; i < pta_size; i++)
1393
    if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1394
      {
1395
        ix86_tune = processor_alias_table[i].processor;
1396
        if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1397
          {
1398
            if (ix86_tune_defaulted)
1399
              {
1400
                ix86_tune_string = "x86-64";
1401
                for (i = 0; i < pta_size; i++)
1402
                  if (! strcmp (ix86_tune_string,
1403
                                processor_alias_table[i].name))
1404
                    break;
1405
                ix86_tune = processor_alias_table[i].processor;
1406
              }
1407
            else
1408
              error ("CPU you selected does not support x86-64 "
1409
                     "instruction set");
1410
          }
1411
        /* Intel CPUs have always interpreted SSE prefetch instructions as
1412
           NOPs; so, we can enable SSE prefetch instructions even when
1413
           -mtune (rather than -march) points us to a processor that has them.
1414
           However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1415
           higher processors.  */
1416
        if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1417
          x86_prefetch_sse = true;
1418
        break;
1419
      }
1420
  if (i == pta_size)
1421
    error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1422
 
1423
  if (optimize_size)
1424
    ix86_cost = &size_cost;
1425
  else
1426
    ix86_cost = processor_target_table[ix86_tune].cost;
1427
  target_flags |= processor_target_table[ix86_tune].target_enable;
1428
  target_flags &= ~processor_target_table[ix86_tune].target_disable;
1429
 
1430
  /* Arrange to set up i386_stack_locals for all functions.  */
1431
  init_machine_status = ix86_init_machine_status;
1432
 
1433
  /* Validate -mregparm= value.  */
1434
  if (ix86_regparm_string)
1435
    {
1436
      i = atoi (ix86_regparm_string);
1437
      if (i < 0 || i > REGPARM_MAX)
1438
        error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1439
      else
1440
        ix86_regparm = i;
1441
    }
1442
  else
1443
   if (TARGET_64BIT)
1444
     ix86_regparm = REGPARM_MAX;
1445
 
1446
  /* If the user has provided any of the -malign-* options,
1447
     warn and use that value only if -falign-* is not set.
1448
     Remove this code in GCC 3.2 or later.  */
1449
  if (ix86_align_loops_string)
1450
    {
1451
      warning (0, "-malign-loops is obsolete, use -falign-loops");
1452
      if (align_loops == 0)
1453
        {
1454
          i = atoi (ix86_align_loops_string);
1455
          if (i < 0 || i > MAX_CODE_ALIGN)
1456
            error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1457
          else
1458
            align_loops = 1 << i;
1459
        }
1460
    }
1461
 
1462
  if (ix86_align_jumps_string)
1463
    {
1464
      warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1465
      if (align_jumps == 0)
1466
        {
1467
          i = atoi (ix86_align_jumps_string);
1468
          if (i < 0 || i > MAX_CODE_ALIGN)
1469
            error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1470
          else
1471
            align_jumps = 1 << i;
1472
        }
1473
    }
1474
 
1475
  if (ix86_align_funcs_string)
1476
    {
1477
      warning (0, "-malign-functions is obsolete, use -falign-functions");
1478
      if (align_functions == 0)
1479
        {
1480
          i = atoi (ix86_align_funcs_string);
1481
          if (i < 0 || i > MAX_CODE_ALIGN)
1482
            error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1483
          else
1484
            align_functions = 1 << i;
1485
        }
1486
    }
1487
 
1488
  /* Default align_* from the processor table.  */
1489
  if (align_loops == 0)
1490
    {
1491
      align_loops = processor_target_table[ix86_tune].align_loop;
1492
      align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1493
    }
1494
  if (align_jumps == 0)
1495
    {
1496
      align_jumps = processor_target_table[ix86_tune].align_jump;
1497
      align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1498
    }
1499
  if (align_functions == 0)
1500
    {
1501
      align_functions = processor_target_table[ix86_tune].align_func;
1502
    }
1503
 
1504
  /* Validate -mpreferred-stack-boundary= value, or provide default.
1505
     The default of 128 bits is for Pentium III's SSE __m128, but we
1506
     don't want additional code to keep the stack aligned when
1507
     optimizing for code size.  */
1508
  ix86_preferred_stack_boundary = (optimize_size
1509
                                   ? TARGET_64BIT ? 128 : 32
1510
                                   : 128);
1511
  if (ix86_preferred_stack_boundary_string)
1512
    {
1513
      i = atoi (ix86_preferred_stack_boundary_string);
1514
      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1515
        error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1516
               TARGET_64BIT ? 4 : 2);
1517
      else
1518
        ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1519
    }
1520
 
1521
  /* Validate -mbranch-cost= value, or provide default.  */
1522
  ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1523
  if (ix86_branch_cost_string)
1524
    {
1525
      i = atoi (ix86_branch_cost_string);
1526
      if (i < 0 || i > 5)
1527
        error ("-mbranch-cost=%d is not between 0 and 5", i);
1528
      else
1529
        ix86_branch_cost = i;
1530
    }
1531
  if (ix86_section_threshold_string)
1532
    {
1533
      i = atoi (ix86_section_threshold_string);
1534
      if (i < 0)
1535
        error ("-mlarge-data-threshold=%d is negative", i);
1536
      else
1537
        ix86_section_threshold = i;
1538
    }
1539
 
1540
  if (ix86_tls_dialect_string)
1541
    {
1542
      if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1543
        ix86_tls_dialect = TLS_DIALECT_GNU;
1544
      else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1545
        ix86_tls_dialect = TLS_DIALECT_SUN;
1546
      else
1547
        error ("bad value (%s) for -mtls-dialect= switch",
1548
               ix86_tls_dialect_string);
1549
    }
1550
 
1551
  /* Keep nonleaf frame pointers.  */
1552
  if (flag_omit_frame_pointer)
1553
    target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1554
  else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1555
    flag_omit_frame_pointer = 1;
1556
 
1557
  /* If we're doing fast math, we don't care about comparison order
1558
     wrt NaNs.  This lets us use a shorter comparison sequence.  */
1559
  if (flag_unsafe_math_optimizations)
1560
    target_flags &= ~MASK_IEEE_FP;
1561
 
1562
  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1563
     since the insns won't need emulation.  */
1564
  if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1565
    target_flags &= ~MASK_NO_FANCY_MATH_387;
1566
 
1567
  /* Likewise, if the target doesn't have a 387, or we've specified
1568
     software floating point, don't use 387 inline intrinsics.  */
1569
  if (!TARGET_80387)
1570
    target_flags |= MASK_NO_FANCY_MATH_387;
1571
 
1572
  /* Turn on SSE2 builtins for -msse3.  */
1573
  if (TARGET_SSE3)
1574
    target_flags |= MASK_SSE2;
1575
 
1576
  /* Turn on SSE builtins for -msse2.  */
1577
  if (TARGET_SSE2)
1578
    target_flags |= MASK_SSE;
1579
 
1580
  /* Turn on MMX builtins for -msse.  */
1581
  if (TARGET_SSE)
1582
    {
1583
      target_flags |= MASK_MMX & ~target_flags_explicit;
1584
      x86_prefetch_sse = true;
1585
    }
1586
 
1587
  /* Turn on MMX builtins for 3Dnow.  */
1588
  if (TARGET_3DNOW)
1589
    target_flags |= MASK_MMX;
1590
 
1591
  if (TARGET_64BIT)
1592
    {
1593
      if (TARGET_ALIGN_DOUBLE)
1594
        error ("-malign-double makes no sense in the 64bit mode");
1595
      if (TARGET_RTD)
1596
        error ("-mrtd calling convention not supported in the 64bit mode");
1597
 
1598
      /* Enable by default the SSE and MMX builtins.  Do allow the user to
1599
         explicitly disable any of these.  In particular, disabling SSE and
1600
         MMX for kernel code is extremely useful.  */
1601
      target_flags
1602
        |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1603
            & ~target_flags_explicit);
1604
     }
1605
  else
1606
    {
1607
      /* i386 ABI does not specify red zone.  It still makes sense to use it
1608
         when programmer takes care to stack from being destroyed.  */
1609
      if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1610
        target_flags |= MASK_NO_RED_ZONE;
1611
    }
1612
 
1613
  /* Accept -msseregparm only if at least SSE support is enabled.  */
1614
  if (TARGET_SSEREGPARM
1615
      && ! TARGET_SSE)
1616
    error ("-msseregparm used without SSE enabled");
1617
 
1618
  ix86_fpmath = TARGET_FPMATH_DEFAULT;
1619
 
1620
  if (ix86_fpmath_string != 0)
1621
    {
1622
      if (! strcmp (ix86_fpmath_string, "387"))
1623
        ix86_fpmath = FPMATH_387;
1624
      else if (! strcmp (ix86_fpmath_string, "sse"))
1625
        {
1626
          if (!TARGET_SSE)
1627
            {
1628
              warning (0, "SSE instruction set disabled, using 387 arithmetics");
1629
              ix86_fpmath = FPMATH_387;
1630
            }
1631
          else
1632
            ix86_fpmath = FPMATH_SSE;
1633
        }
1634
      else if (! strcmp (ix86_fpmath_string, "387,sse")
1635
               || ! strcmp (ix86_fpmath_string, "sse,387"))
1636
        {
1637
          if (!TARGET_SSE)
1638
            {
1639
              warning (0, "SSE instruction set disabled, using 387 arithmetics");
1640
              ix86_fpmath = FPMATH_387;
1641
            }
1642
          else if (!TARGET_80387)
1643
            {
1644
              warning (0, "387 instruction set disabled, using SSE arithmetics");
1645
              ix86_fpmath = FPMATH_SSE;
1646
            }
1647
          else
1648
            ix86_fpmath = FPMATH_SSE | FPMATH_387;
1649
        }
1650
      else
1651
        error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1652
    }
1653
 
1654
  /* If the i387 is disabled, then do not return values in it. */
1655
  if (!TARGET_80387)
1656
    target_flags &= ~MASK_FLOAT_RETURNS;
1657
 
1658
  if ((x86_accumulate_outgoing_args & TUNEMASK)
1659
      && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1660
      && !optimize_size)
1661
    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1662
 
1663
  /* ??? Unwind info is not correct around the CFG unless either a frame
1664
     pointer is present or M_A_O_A is set.  Fixing this requires rewriting
1665
     unwind info generation to be aware of the CFG and propagating states
1666
     around edges.  */
1667
  if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1668
       || flag_exceptions || flag_non_call_exceptions)
1669
      && flag_omit_frame_pointer
1670
      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1671
    {
1672
      if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1673
        warning (0, "unwind tables currently require either a frame pointer "
1674
                 "or -maccumulate-outgoing-args for correctness");
1675
      target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1676
    }
1677
 
1678
  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
1679
  {
1680
    char *p;
1681
    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1682
    p = strchr (internal_label_prefix, 'X');
1683
    internal_label_prefix_len = p - internal_label_prefix;
1684
    *p = '\0';
1685
  }
1686
 
1687
  /* When scheduling description is not available, disable scheduler pass
1688
     so it won't slow down the compilation and make x87 code slower.  */
1689
  if (!TARGET_SCHEDULE)
1690
    flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1691
}
1692
 
1693
/* switch to the appropriate section for output of DECL.
1694
   DECL is either a `VAR_DECL' node or a constant of some sort.
1695
   RELOC indicates whether forming the initial value of DECL requires
1696
   link-time relocations.  */
1697
 
1698
static void
1699
x86_64_elf_select_section (tree decl, int reloc,
1700
                         unsigned HOST_WIDE_INT align)
1701
{
1702
  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1703
      && ix86_in_large_data_p (decl))
1704
    {
1705
      const char *sname = NULL;
1706
      unsigned int flags = SECTION_WRITE;
1707
      switch (categorize_decl_for_section (decl, reloc, flag_pic))
1708
        {
1709
        case SECCAT_DATA:
1710
          sname = ".ldata";
1711
          break;
1712
        case SECCAT_DATA_REL:
1713
          sname = ".ldata.rel";
1714
          break;
1715
        case SECCAT_DATA_REL_LOCAL:
1716
          sname = ".ldata.rel.local";
1717
          break;
1718
        case SECCAT_DATA_REL_RO:
1719
          sname = ".ldata.rel.ro";
1720
          break;
1721
        case SECCAT_DATA_REL_RO_LOCAL:
1722
          sname = ".ldata.rel.ro.local";
1723
          break;
1724
        case SECCAT_BSS:
1725
          sname = ".lbss";
1726
          flags |= SECTION_BSS;
1727
          break;
1728
        case SECCAT_RODATA:
1729
        case SECCAT_RODATA_MERGE_STR:
1730
        case SECCAT_RODATA_MERGE_STR_INIT:
1731
        case SECCAT_RODATA_MERGE_CONST:
1732
          sname = ".lrodata";
1733
          flags = 0;
1734
          break;
1735
        case SECCAT_SRODATA:
1736
        case SECCAT_SDATA:
1737
        case SECCAT_SBSS:
1738
          gcc_unreachable ();
1739
        case SECCAT_TEXT:
1740
        case SECCAT_TDATA:
1741
        case SECCAT_TBSS:
1742
          /* We don't split these for medium model.  Place them into
1743
             default sections and hope for best.  */
1744
          break;
1745
        }
1746
      if (sname)
1747
        {
1748
          /* We might get called with string constants, but named_section
1749
             doesn't like them as they are not DECLs.  Also, we need to set
1750
             flags in that case.  */
1751
          if (!DECL_P (decl))
1752
            named_section_flags (sname, flags);
1753
          else
1754
            named_section (decl, sname, reloc);
1755
          return;
1756
        }
1757
    }
1758
  default_elf_select_section (decl, reloc, align);
1759
}
1760
 
1761
/* Build up a unique section name, expressed as a
1762
   STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
1763
   RELOC indicates whether the initial value of EXP requires
1764
   link-time relocations.  */
1765
 
1766
static void
1767
x86_64_elf_unique_section (tree decl, int reloc)
1768
{
1769
  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1770
      && ix86_in_large_data_p (decl))
1771
    {
1772
      const char *prefix = NULL;
1773
      /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
1774
      bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
1775
 
1776
      switch (categorize_decl_for_section (decl, reloc, flag_pic))
1777
        {
1778
        case SECCAT_DATA:
1779
        case SECCAT_DATA_REL:
1780
        case SECCAT_DATA_REL_LOCAL:
1781
        case SECCAT_DATA_REL_RO:
1782
        case SECCAT_DATA_REL_RO_LOCAL:
1783
          prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
1784
          break;
1785
        case SECCAT_BSS:
1786
          prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
1787
          break;
1788
        case SECCAT_RODATA:
1789
        case SECCAT_RODATA_MERGE_STR:
1790
        case SECCAT_RODATA_MERGE_STR_INIT:
1791
        case SECCAT_RODATA_MERGE_CONST:
1792
          prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
1793
          break;
1794
        case SECCAT_SRODATA:
1795
        case SECCAT_SDATA:
1796
        case SECCAT_SBSS:
1797
          gcc_unreachable ();
1798
        case SECCAT_TEXT:
1799
        case SECCAT_TDATA:
1800
        case SECCAT_TBSS:
1801
          /* We don't split these for medium model.  Place them into
1802
             default sections and hope for best.  */
1803
          break;
1804
        }
1805
      if (prefix)
1806
        {
1807
          const char *name;
1808
          size_t nlen, plen;
1809
          char *string;
1810
          plen = strlen (prefix);
1811
 
1812
          name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
1813
          name = targetm.strip_name_encoding (name);
1814
          nlen = strlen (name);
1815
 
1816
          string = alloca (nlen + plen + 1);
1817
          memcpy (string, prefix, plen);
1818
          memcpy (string + plen, name, nlen + 1);
1819
 
1820
          DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
1821
          return;
1822
        }
1823
    }
1824
  default_unique_section (decl, reloc);
1825
}
1826
 
1827
#ifdef COMMON_ASM_OP
1828
/* This says how to output assembler code to declare an
1829
   uninitialized external linkage data object.
1830
 
1831
   For medium model x86-64 we need to use .largecomm opcode for
1832
   large objects.  */
1833
void
1834
x86_elf_aligned_common (FILE *file,
1835
                        const char *name, unsigned HOST_WIDE_INT size,
1836
                        int align)
1837
{
1838
  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1839
      && size > (unsigned int)ix86_section_threshold)
1840
    fprintf (file, ".largecomm\t");
1841
  else
1842
    fprintf (file, "%s", COMMON_ASM_OP);
1843
  assemble_name (file, name);
1844
  fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
1845
           size, align / BITS_PER_UNIT);
1846
}
1847
 
1848
/* Utility function for targets to use in implementing
1849
   ASM_OUTPUT_ALIGNED_BSS.  */
1850
 
1851
void
1852
x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
1853
                        const char *name, unsigned HOST_WIDE_INT size,
1854
                        int align)
1855
{
1856
  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1857
      && size > (unsigned int)ix86_section_threshold)
1858
    named_section (decl, ".lbss", 0);
1859
  else
1860
    bss_section ();
1861
  ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
1862
#ifdef ASM_DECLARE_OBJECT_NAME
1863
  last_assemble_variable_decl = decl;
1864
  ASM_DECLARE_OBJECT_NAME (file, name, decl);
1865
#else
1866
  /* Standard thing is just output label for the object.  */
1867
  ASM_OUTPUT_LABEL (file, name);
1868
#endif /* ASM_DECLARE_OBJECT_NAME */
1869
  ASM_OUTPUT_SKIP (file, size ? size : 1);
1870
}
1871
#endif
1872
 
1873
void
1874
optimization_options (int level, int size ATTRIBUTE_UNUSED)
1875
{
1876
  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
1877
     make the problem with not enough registers even worse.  */
1878
#ifdef INSN_SCHEDULING
1879
  if (level > 1)
1880
    flag_schedule_insns = 0;
1881
#endif
1882
 
1883
  if (TARGET_MACHO)
1884
    /* The Darwin libraries never set errno, so we might as well
1885
       avoid calling them when that's the only reason we would.  */
1886
    flag_errno_math = 0;
1887
 
1888
  /* The default values of these switches depend on the TARGET_64BIT
1889
     that is not known at this moment.  Mark these values with 2 and
1890
     let user the to override these.  In case there is no command line option
1891
     specifying them, we will set the defaults in override_options.  */
1892
  if (optimize >= 1)
1893
    flag_omit_frame_pointer = 2;
1894
  flag_pcc_struct_return = 2;
1895
  flag_asynchronous_unwind_tables = 2;
1896
#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1897
  SUBTARGET_OPTIMIZATION_OPTIONS;
1898
#endif
1899
}
1900
 
1901
/* Table of valid machine attributes.  */
1902
const struct attribute_spec ix86_attribute_table[] =
1903
{
1904
  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1905
  /* Stdcall attribute says callee is responsible for popping arguments
1906
     if they are not variable.  */
1907
  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
1908
  /* Fastcall attribute says callee is responsible for popping arguments
1909
     if they are not variable.  */
1910
  { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
1911
  /* Cdecl attribute says the callee is a normal C declaration */
1912
  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
1913
  /* Regparm attribute specifies how many integer arguments are to be
1914
     passed in registers.  */
1915
  { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
1916
  /* Sseregparm attribute says we are using x86_64 calling conventions
1917
     for FP arguments.  */
1918
  { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1919
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1920
  { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1921
  { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1922
  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
1923
#endif
1924
  { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
1925
  { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
1926
#ifdef SUBTARGET_ATTRIBUTE_TABLE
1927
  SUBTARGET_ATTRIBUTE_TABLE,
1928
#endif
1929
  { NULL,        0, 0, false, false, false, NULL }
1930
};
1931
 
1932
/* Decide whether we can make a sibling call to a function.  DECL is the
1933
   declaration of the function being targeted by the call and EXP is the
1934
   CALL_EXPR representing the call.  */
1935
 
1936
static bool
1937
ix86_function_ok_for_sibcall (tree decl, tree exp)
1938
{
1939
  tree func;
1940
  rtx a, b;
1941
 
1942
  /* If we are generating position-independent code, we cannot sibcall
1943
     optimize any indirect call, or a direct call to a global function,
1944
     as the PLT requires %ebx be live.  */
1945
  if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1946
    return false;
1947
 
1948
  if (decl)
1949
    func = decl;
1950
  else
1951
    {
1952
      func = TREE_TYPE (TREE_OPERAND (exp, 0));
1953
      if (POINTER_TYPE_P (func))
1954
        func = TREE_TYPE (func);
1955
    }
1956
 
1957
  /* Check that the return value locations are the same.  Like
1958
     if we are returning floats on the 80387 register stack, we cannot
1959
     make a sibcall from a function that doesn't return a float to a
1960
     function that does or, conversely, from a function that does return
1961
     a float to a function that doesn't; the necessary stack adjustment
1962
     would not be executed.  This is also the place we notice
1963
     differences in the return value ABI.  Note that it is ok for one
1964
     of the functions to have void return type as long as the return
1965
     value of the other is passed in a register.  */
1966
  a = ix86_function_value (TREE_TYPE (exp), func, false);
1967
  b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1968
                           cfun->decl, false);
1969
  if (STACK_REG_P (a) || STACK_REG_P (b))
1970
    {
1971
      if (!rtx_equal_p (a, b))
1972
        return false;
1973
    }
1974
  else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
1975
    ;
1976
  else if (!rtx_equal_p (a, b))
1977
    return false;
1978
 
1979
  /* If this call is indirect, we'll need to be able to use a call-clobbered
1980
     register for the address of the target function.  Make sure that all
1981
     such registers are not used for passing parameters.  */
1982
  if (!decl && !TARGET_64BIT)
1983
    {
1984
      tree type;
1985
 
1986
      /* We're looking at the CALL_EXPR, we need the type of the function.  */
1987
      type = TREE_OPERAND (exp, 0);              /* pointer expression */
1988
      type = TREE_TYPE (type);                  /* pointer type */
1989
      type = TREE_TYPE (type);                  /* function type */
1990
 
1991
      if (ix86_function_regparm (type, NULL) >= 3)
1992
        {
1993
          /* ??? Need to count the actual number of registers to be used,
1994
             not the possible number of registers.  Fix later.  */
1995
          return false;
1996
        }
1997
    }
1998
 
1999
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2000
  /* Dllimport'd functions are also called indirectly.  */
2001
  if (decl && DECL_DLLIMPORT_P (decl)
2002
      && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2003
    return false;
2004
#endif
2005
 
2006
  /* If we forced aligned the stack, then sibcalling would unalign the
2007
     stack, which may break the called function.  */
2008
  if (cfun->machine->force_align_arg_pointer)
2009
    return false;
2010
 
2011
  /* Otherwise okay.  That also includes certain types of indirect calls.  */
2012
  return true;
2013
}
2014
 
2015
/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2016
   calling convention attributes;
2017
   arguments as in struct attribute_spec.handler.  */
2018
 
2019
static tree
2020
ix86_handle_cconv_attribute (tree *node, tree name,
2021
                                   tree args,
2022
                                   int flags ATTRIBUTE_UNUSED,
2023
                                   bool *no_add_attrs)
2024
{
2025
  if (TREE_CODE (*node) != FUNCTION_TYPE
2026
      && TREE_CODE (*node) != METHOD_TYPE
2027
      && TREE_CODE (*node) != FIELD_DECL
2028
      && TREE_CODE (*node) != TYPE_DECL)
2029
    {
2030
      warning (OPT_Wattributes, "%qs attribute only applies to functions",
2031
               IDENTIFIER_POINTER (name));
2032
      *no_add_attrs = true;
2033
      return NULL_TREE;
2034
    }
2035
 
2036
  /* Can combine regparm with all attributes but fastcall.  */
2037
  if (is_attribute_p ("regparm", name))
2038
    {
2039
      tree cst;
2040
 
2041
      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2042
        {
2043
          error ("fastcall and regparm attributes are not compatible");
2044
        }
2045
 
2046
      cst = TREE_VALUE (args);
2047
      if (TREE_CODE (cst) != INTEGER_CST)
2048
        {
2049
          warning (OPT_Wattributes,
2050
                   "%qs attribute requires an integer constant argument",
2051
                   IDENTIFIER_POINTER (name));
2052
          *no_add_attrs = true;
2053
        }
2054
      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2055
        {
2056
          warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2057
                   IDENTIFIER_POINTER (name), REGPARM_MAX);
2058
          *no_add_attrs = true;
2059
        }
2060
 
2061
      return NULL_TREE;
2062
    }
2063
 
2064
  if (TARGET_64BIT)
2065
    {
2066
      warning (OPT_Wattributes, "%qs attribute ignored",
2067
               IDENTIFIER_POINTER (name));
2068
      *no_add_attrs = true;
2069
      return NULL_TREE;
2070
    }
2071
 
2072
  /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
2073
  if (is_attribute_p ("fastcall", name))
2074
    {
2075
      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2076
        {
2077
          error ("fastcall and cdecl attributes are not compatible");
2078
        }
2079
      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2080
        {
2081
          error ("fastcall and stdcall attributes are not compatible");
2082
        }
2083
      if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2084
        {
2085
          error ("fastcall and regparm attributes are not compatible");
2086
        }
2087
    }
2088
 
2089
  /* Can combine stdcall with fastcall (redundant), regparm and
2090
     sseregparm.  */
2091
  else if (is_attribute_p ("stdcall", name))
2092
    {
2093
      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2094
        {
2095
          error ("stdcall and cdecl attributes are not compatible");
2096
        }
2097
      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2098
        {
2099
          error ("stdcall and fastcall attributes are not compatible");
2100
        }
2101
    }
2102
 
2103
  /* Can combine cdecl with regparm and sseregparm.  */
2104
  else if (is_attribute_p ("cdecl", name))
2105
    {
2106
      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2107
        {
2108
          error ("stdcall and cdecl attributes are not compatible");
2109
        }
2110
      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2111
        {
2112
          error ("fastcall and cdecl attributes are not compatible");
2113
        }
2114
    }
2115
 
2116
  /* Can combine sseregparm with all attributes.  */
2117
 
2118
  return NULL_TREE;
2119
}
2120
 
2121
/* Return 0 if the attributes for two types are incompatible, 1 if they
2122
   are compatible, and 2 if they are nearly compatible (which causes a
2123
   warning to be generated).  */
2124
 
2125
static int
2126
ix86_comp_type_attributes (tree type1, tree type2)
2127
{
2128
  /* Check for mismatch of non-default calling convention.  */
2129
  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2130
 
2131
  if (TREE_CODE (type1) != FUNCTION_TYPE)
2132
    return 1;
2133
 
2134
  /* Check for mismatched fastcall/regparm types.  */
2135
  if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2136
       != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2137
      || (ix86_function_regparm (type1, NULL)
2138
          != ix86_function_regparm (type2, NULL)))
2139
    return 0;
2140
 
2141
  /* Check for mismatched sseregparm types.  */
2142
  if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2143
      != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2144
    return 0;
2145
 
2146
  /* Check for mismatched return types (cdecl vs stdcall).  */
2147
  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2148
      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2149
    return 0;
2150
 
2151
  return 1;
2152
}
2153
 
2154
/* Return the regparm value for a function with the indicated TYPE and DECL.
2155
   DECL may be NULL when calling function indirectly
2156
   or considering a libcall.  */
2157
 
2158
static int
2159
ix86_function_regparm (tree type, tree decl)
2160
{
2161
  tree attr;
2162
  int regparm = ix86_regparm;
2163
  bool user_convention = false;
2164
 
2165
  if (!TARGET_64BIT)
2166
    {
2167
      attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2168
      if (attr)
2169
        {
2170
          regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2171
          user_convention = true;
2172
        }
2173
 
2174
      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2175
        {
2176
          regparm = 2;
2177
          user_convention = true;
2178
        }
2179
 
2180
      /* Use register calling convention for local functions when possible.  */
2181
      if (!TARGET_64BIT && !user_convention && decl
2182
          && flag_unit_at_a_time && !profile_flag)
2183
        {
2184
          struct cgraph_local_info *i = cgraph_local_info (decl);
2185
          if (i && i->local)
2186
            {
2187
              int local_regparm, globals = 0, regno;
2188
 
2189
              /* Make sure no regparm register is taken by a global register
2190
                 variable.  */
2191
              for (local_regparm = 0; local_regparm < 3; local_regparm++)
2192
                if (global_regs[local_regparm])
2193
                  break;
2194
              /* We can't use regparm(3) for nested functions as these use
2195
                 static chain pointer in third argument.  */
2196
              if (local_regparm == 3
2197
                  && decl_function_context (decl)
2198
                  && !DECL_NO_STATIC_CHAIN (decl))
2199
                local_regparm = 2;
2200
              /* Each global register variable increases register preassure,
2201
                 so the more global reg vars there are, the smaller regparm
2202
                 optimization use, unless requested by the user explicitly.  */
2203
              for (regno = 0; regno < 6; regno++)
2204
                if (global_regs[regno])
2205
                  globals++;
2206
              local_regparm
2207
                = globals < local_regparm ? local_regparm - globals : 0;
2208
 
2209
              if (local_regparm > regparm)
2210
                regparm = local_regparm;
2211
            }
2212
        }
2213
    }
2214
  return regparm;
2215
}
2216
 
2217
/* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2218
   in SSE registers for a function with the indicated TYPE and DECL.
2219
   DECL may be NULL when calling function indirectly
2220
   or considering a libcall.  Otherwise return 0.  */
2221
 
2222
static int
2223
ix86_function_sseregparm (tree type, tree decl)
2224
{
2225
  /* Use SSE registers to pass SFmode and DFmode arguments if requested
2226
     by the sseregparm attribute.  */
2227
  if (TARGET_SSEREGPARM
2228
      || (type
2229
          && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2230
    {
2231
      if (!TARGET_SSE)
2232
        {
2233
          if (decl)
2234
            error ("Calling %qD with attribute sseregparm without "
2235
                   "SSE/SSE2 enabled", decl);
2236
          else
2237
            error ("Calling %qT with attribute sseregparm without "
2238
                   "SSE/SSE2 enabled", type);
2239
          return 0;
2240
        }
2241
 
2242
      return 2;
2243
    }
2244
 
2245
  /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2246
     in SSE registers even for 32-bit mode and not just 3, but up to
2247
     8 SSE arguments in registers.  */
2248
  if (!TARGET_64BIT && decl
2249
      && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2250
    {
2251
      struct cgraph_local_info *i = cgraph_local_info (decl);
2252
      if (i && i->local)
2253
        return TARGET_SSE2 ? 2 : 1;
2254
    }
2255
 
2256
  return 0;
2257
}
2258
 
2259
/* Return true if EAX is live at the start of the function.  Used by
2260
   ix86_expand_prologue to determine if we need special help before
2261
   calling allocate_stack_worker.  */
2262
 
2263
static bool
2264
ix86_eax_live_at_start_p (void)
2265
{
2266
  /* Cheat.  Don't bother working forward from ix86_function_regparm
2267
     to the function type to whether an actual argument is located in
2268
     eax.  Instead just look at cfg info, which is still close enough
2269
     to correct at this point.  This gives false positives for broken
2270
     functions that might use uninitialized data that happens to be
2271
     allocated in eax, but who cares?  */
2272
  return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2273
}
2274
 
2275
/* Value is the number of bytes of arguments automatically
2276
   popped when returning from a subroutine call.
2277
   FUNDECL is the declaration node of the function (as a tree),
2278
   FUNTYPE is the data type of the function (as a tree),
2279
   or for a library call it is an identifier node for the subroutine name.
2280
   SIZE is the number of bytes of arguments passed on the stack.
2281
 
2282
   On the 80386, the RTD insn may be used to pop them if the number
2283
     of args is fixed, but if the number is variable then the caller
2284
     must pop them all.  RTD can't be used for library calls now
2285
     because the library is compiled with the Unix compiler.
2286
   Use of RTD is a selectable option, since it is incompatible with
2287
   standard Unix calling sequences.  If the option is not selected,
2288
   the caller must always pop the args.
2289
 
2290
   The attribute stdcall is equivalent to RTD on a per module basis.  */
2291
 
2292
int
2293
ix86_return_pops_args (tree fundecl, tree funtype, int size)
2294
{
2295
  int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2296
 
2297
  /* Cdecl functions override -mrtd, and never pop the stack.  */
2298
  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2299
 
2300
    /* Stdcall and fastcall functions will pop the stack if not
2301
       variable args.  */
2302
    if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2303
        || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2304
      rtd = 1;
2305
 
2306
    if (rtd
2307
        && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2308
            || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2309
                == void_type_node)))
2310
      return size;
2311
  }
2312
 
2313
  /* Lose any fake structure return argument if it is passed on the stack.  */
2314
  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2315
      && !TARGET_64BIT
2316
      && !KEEP_AGGREGATE_RETURN_POINTER)
2317
    {
2318
      int nregs = ix86_function_regparm (funtype, fundecl);
2319
 
2320
      if (!nregs)
2321
        return GET_MODE_SIZE (Pmode);
2322
    }
2323
 
2324
  return 0;
2325
}
2326
 
2327
/* Argument support functions.  */
2328
 
2329
/* Return true when register may be used to pass function parameters.  */
2330
bool
2331
ix86_function_arg_regno_p (int regno)
2332
{
2333
  int i;
2334
  if (!TARGET_64BIT)
2335
    return (regno < REGPARM_MAX
2336
            || (TARGET_MMX && MMX_REGNO_P (regno)
2337
                && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2338
            || (TARGET_SSE && SSE_REGNO_P (regno)
2339
                && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2340
 
2341
  if (TARGET_SSE && SSE_REGNO_P (regno)
2342
      && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2343
    return true;
2344
  /* RAX is used as hidden argument to va_arg functions.  */
2345
  if (!regno)
2346
    return true;
2347
  for (i = 0; i < REGPARM_MAX; i++)
2348
    if (regno == x86_64_int_parameter_registers[i])
2349
      return true;
2350
  return false;
2351
}
2352
 
2353
/* Return if we do not know how to pass TYPE solely in registers.  */
2354
 
2355
static bool
2356
ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2357
{
2358
  if (must_pass_in_stack_var_size_or_pad (mode, type))
2359
    return true;
2360
 
2361
  /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
2362
     The layout_type routine is crafty and tries to trick us into passing
2363
     currently unsupported vector types on the stack by using TImode.  */
2364
  return (!TARGET_64BIT && mode == TImode
2365
          && type && TREE_CODE (type) != VECTOR_TYPE);
2366
}
2367
 
2368
/* Initialize a variable CUM of type CUMULATIVE_ARGS
2369
   for a call to a function whose data type is FNTYPE.
2370
   For a library call, FNTYPE is 0.  */
2371
 
2372
void
2373
init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
2374
                      tree fntype,      /* tree ptr for function decl */
2375
                      rtx libname,      /* SYMBOL_REF of library name or 0 */
2376
                      tree fndecl)
2377
{
2378
  static CUMULATIVE_ARGS zero_cum;
2379
  tree param, next_param;
2380
 
2381
  if (TARGET_DEBUG_ARG)
2382
    {
2383
      fprintf (stderr, "\ninit_cumulative_args (");
2384
      if (fntype)
2385
        fprintf (stderr, "fntype code = %s, ret code = %s",
2386
                 tree_code_name[(int) TREE_CODE (fntype)],
2387
                 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2388
      else
2389
        fprintf (stderr, "no fntype");
2390
 
2391
      if (libname)
2392
        fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2393
    }
2394
 
2395
  *cum = zero_cum;
2396
 
2397
  /* Set up the number of registers to use for passing arguments.  */
2398
  cum->nregs = ix86_regparm;
2399
  if (TARGET_SSE)
2400
    cum->sse_nregs = SSE_REGPARM_MAX;
2401
  if (TARGET_MMX)
2402
    cum->mmx_nregs = MMX_REGPARM_MAX;
2403
  cum->warn_sse = true;
2404
  cum->warn_mmx = true;
2405
  cum->maybe_vaarg = false;
2406
 
2407
  /* Use ecx and edx registers if function has fastcall attribute,
2408
     else look for regparm information.  */
2409
  if (fntype && !TARGET_64BIT)
2410
    {
2411
      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2412
        {
2413
          cum->nregs = 2;
2414
          cum->fastcall = 1;
2415
        }
2416
      else
2417
        cum->nregs = ix86_function_regparm (fntype, fndecl);
2418
    }
2419
 
2420
  /* Set up the number of SSE registers used for passing SFmode
2421
     and DFmode arguments.  Warn for mismatching ABI.  */
2422
  cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2423
 
2424
  /* Determine if this function has variable arguments.  This is
2425
     indicated by the last argument being 'void_type_mode' if there
2426
     are no variable arguments.  If there are variable arguments, then
2427
     we won't pass anything in registers in 32-bit mode. */
2428
 
2429
  if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2430
    {
2431
      for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2432
           param != 0; param = next_param)
2433
        {
2434
          next_param = TREE_CHAIN (param);
2435
          if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2436
            {
2437
              if (!TARGET_64BIT)
2438
                {
2439
                  cum->nregs = 0;
2440
                  cum->sse_nregs = 0;
2441
                  cum->mmx_nregs = 0;
2442
                  cum->warn_sse = 0;
2443
                  cum->warn_mmx = 0;
2444
                  cum->fastcall = 0;
2445
                  cum->float_in_sse = 0;
2446
                }
2447
              cum->maybe_vaarg = true;
2448
            }
2449
        }
2450
    }
2451
  if ((!fntype && !libname)
2452
      || (fntype && !TYPE_ARG_TYPES (fntype)))
2453
    cum->maybe_vaarg = true;
2454
 
2455
  if (TARGET_DEBUG_ARG)
2456
    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2457
 
2458
  return;
2459
}
2460
 
2461
/* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
2462
   But in the case of vector types, it is some vector mode.
2463
 
2464
   When we have only some of our vector isa extensions enabled, then there
2465
   are some modes for which vector_mode_supported_p is false.  For these
2466
   modes, the generic vector support in gcc will choose some non-vector mode
2467
   in order to implement the type.  By computing the natural mode, we'll
2468
   select the proper ABI location for the operand and not depend on whatever
2469
   the middle-end decides to do with these vector types.  */
2470
 
2471
static enum machine_mode
2472
type_natural_mode (tree type)
2473
{
2474
  enum machine_mode mode = TYPE_MODE (type);
2475
 
2476
  if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2477
    {
2478
      HOST_WIDE_INT size = int_size_in_bytes (type);
2479
      if ((size == 8 || size == 16)
2480
          /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
2481
          && TYPE_VECTOR_SUBPARTS (type) > 1)
2482
        {
2483
          enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2484
 
2485
          if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2486
            mode = MIN_MODE_VECTOR_FLOAT;
2487
          else
2488
            mode = MIN_MODE_VECTOR_INT;
2489
 
2490
          /* Get the mode which has this inner mode and number of units.  */
2491
          for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2492
            if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2493
                && GET_MODE_INNER (mode) == innermode)
2494
              return mode;
2495
 
2496
          gcc_unreachable ();
2497
        }
2498
    }
2499
 
2500
  return mode;
2501
}
2502
 
2503
/* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
2504
   this may not agree with the mode that the type system has chosen for the
2505
   register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
2506
   go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
2507
 
2508
static rtx
2509
gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2510
                     unsigned int regno)
2511
{
2512
  rtx tmp;
2513
 
2514
  if (orig_mode != BLKmode)
2515
    tmp = gen_rtx_REG (orig_mode, regno);
2516
  else
2517
    {
2518
      tmp = gen_rtx_REG (mode, regno);
2519
      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2520
      tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2521
    }
2522
 
2523
  return tmp;
2524
}
2525
 
2526
/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
2527
   of this code is to classify each 8bytes of incoming argument by the register
2528
   class and assign registers accordingly.  */
2529
 
2530
/* Return the union class of CLASS1 and CLASS2.
2531
   See the x86-64 PS ABI for details.  */
2532
 
2533
static enum x86_64_reg_class
2534
merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2535
{
2536
  /* Rule #1: If both classes are equal, this is the resulting class.  */
2537
  if (class1 == class2)
2538
    return class1;
2539
 
2540
  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2541
     the other class.  */
2542
  if (class1 == X86_64_NO_CLASS)
2543
    return class2;
2544
  if (class2 == X86_64_NO_CLASS)
2545
    return class1;
2546
 
2547
  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
2548
  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2549
    return X86_64_MEMORY_CLASS;
2550
 
2551
  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
2552
  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2553
      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2554
    return X86_64_INTEGERSI_CLASS;
2555
  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2556
      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2557
    return X86_64_INTEGER_CLASS;
2558
 
2559
  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2560
     MEMORY is used.  */
2561
  if (class1 == X86_64_X87_CLASS
2562
      || class1 == X86_64_X87UP_CLASS
2563
      || class1 == X86_64_COMPLEX_X87_CLASS
2564
      || class2 == X86_64_X87_CLASS
2565
      || class2 == X86_64_X87UP_CLASS
2566
      || class2 == X86_64_COMPLEX_X87_CLASS)
2567
    return X86_64_MEMORY_CLASS;
2568
 
2569
  /* Rule #6: Otherwise class SSE is used.  */
2570
  return X86_64_SSE_CLASS;
2571
}
2572
 
2573
/* Classify the argument of type TYPE and mode MODE.
2574
   CLASSES will be filled by the register class used to pass each word
2575
   of the operand.  The number of words is returned.  In case the parameter
2576
   should be passed in memory, 0 is returned. As a special case for zero
2577
   sized containers, classes[0] will be NO_CLASS and 1 is returned.
2578
 
2579
   BIT_OFFSET is used internally for handling records and specifies offset
2580
   of the offset in bits modulo 256 to avoid overflow cases.
2581
 
2582
   See the x86-64 PS ABI for details.
2583
*/
2584
 
2585
static int
2586
classify_argument (enum machine_mode mode, tree type,
2587
                   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2588
{
2589
  HOST_WIDE_INT bytes =
2590
    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2591
  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2592
 
2593
  /* Variable sized entities are always passed/returned in memory.  */
2594
  if (bytes < 0)
2595
    return 0;
2596
 
2597
  if (mode != VOIDmode
2598
      && targetm.calls.must_pass_in_stack (mode, type))
2599
    return 0;
2600
 
2601
  if (type && AGGREGATE_TYPE_P (type))
2602
    {
2603
      int i;
2604
      tree field;
2605
      enum x86_64_reg_class subclasses[MAX_CLASSES];
2606
 
2607
      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
2608
      if (bytes > 16)
2609
        return 0;
2610
 
2611
      for (i = 0; i < words; i++)
2612
        classes[i] = X86_64_NO_CLASS;
2613
 
2614
      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
2615
         signalize memory class, so handle it as special case.  */
2616
      if (!words)
2617
        {
2618
          classes[0] = X86_64_NO_CLASS;
2619
          return 1;
2620
        }
2621
 
2622
      /* Classify each field of record and merge classes.  */
2623
      switch (TREE_CODE (type))
2624
        {
2625
        case RECORD_TYPE:
2626
          /* For classes first merge in the field of the subclasses.  */
2627
          if (TYPE_BINFO (type))
2628
            {
2629
              tree binfo, base_binfo;
2630
              int basenum;
2631
 
2632
              for (binfo = TYPE_BINFO (type), basenum = 0;
2633
                   BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2634
                {
2635
                   int num;
2636
                   int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2637
                   tree type = BINFO_TYPE (base_binfo);
2638
 
2639
                   num = classify_argument (TYPE_MODE (type),
2640
                                            type, subclasses,
2641
                                            (offset + bit_offset) % 256);
2642
                   if (!num)
2643
                     return 0;
2644
                   for (i = 0; i < num; i++)
2645
                     {
2646
                       int pos = (offset + (bit_offset % 64)) / 8 / 8;
2647
                       classes[i + pos] =
2648
                         merge_classes (subclasses[i], classes[i + pos]);
2649
                     }
2650
                }
2651
            }
2652
          /* And now merge the fields of structure.  */
2653
          for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2654
            {
2655
              if (TREE_CODE (field) == FIELD_DECL)
2656
                {
2657
                  int num;
2658
 
2659
                  if (TREE_TYPE (field) == error_mark_node)
2660
                    continue;
2661
 
2662
                  /* Bitfields are always classified as integer.  Handle them
2663
                     early, since later code would consider them to be
2664
                     misaligned integers.  */
2665
                  if (DECL_BIT_FIELD (field))
2666
                    {
2667
                      for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2668
                           i < ((int_bit_position (field) + (bit_offset % 64))
2669
                                + tree_low_cst (DECL_SIZE (field), 0)
2670
                                + 63) / 8 / 8; i++)
2671
                        classes[i] =
2672
                          merge_classes (X86_64_INTEGER_CLASS,
2673
                                         classes[i]);
2674
                    }
2675
                  else
2676
                    {
2677
                      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2678
                                               TREE_TYPE (field), subclasses,
2679
                                               (int_bit_position (field)
2680
                                                + bit_offset) % 256);
2681
                      if (!num)
2682
                        return 0;
2683
                      for (i = 0; i < num; i++)
2684
                        {
2685
                          int pos =
2686
                            (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2687
                          classes[i + pos] =
2688
                            merge_classes (subclasses[i], classes[i + pos]);
2689
                        }
2690
                    }
2691
                }
2692
            }
2693
          break;
2694
 
2695
        case ARRAY_TYPE:
2696
          /* Arrays are handled as small records.  */
2697
          {
2698
            int num;
2699
            num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2700
                                     TREE_TYPE (type), subclasses, bit_offset);
2701
            if (!num)
2702
              return 0;
2703
 
2704
            /* The partial classes are now full classes.  */
2705
            if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2706
              subclasses[0] = X86_64_SSE_CLASS;
2707
            if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2708
              subclasses[0] = X86_64_INTEGER_CLASS;
2709
 
2710
            for (i = 0; i < words; i++)
2711
              classes[i] = subclasses[i % num];
2712
 
2713
            break;
2714
          }
2715
        case UNION_TYPE:
2716
        case QUAL_UNION_TYPE:
2717
          /* Unions are similar to RECORD_TYPE but offset is always 0.
2718
             */
2719
 
2720
          /* Unions are not derived.  */
2721
          gcc_assert (!TYPE_BINFO (type)
2722
                      || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
2723
          for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2724
            {
2725
              if (TREE_CODE (field) == FIELD_DECL)
2726
                {
2727
                  int num;
2728
                  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2729
                                           TREE_TYPE (field), subclasses,
2730
                                           bit_offset);
2731
                  if (!num)
2732
                    return 0;
2733
                  for (i = 0; i < num; i++)
2734
                    classes[i] = merge_classes (subclasses[i], classes[i]);
2735
                }
2736
            }
2737
          break;
2738
 
2739
        default:
2740
          gcc_unreachable ();
2741
        }
2742
 
2743
      /* Final merger cleanup.  */
2744
      for (i = 0; i < words; i++)
2745
        {
2746
          /* If one class is MEMORY, everything should be passed in
2747
             memory.  */
2748
          if (classes[i] == X86_64_MEMORY_CLASS)
2749
            return 0;
2750
 
2751
          /* The X86_64_SSEUP_CLASS should be always preceded by
2752
             X86_64_SSE_CLASS.  */
2753
          if (classes[i] == X86_64_SSEUP_CLASS
2754
              && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2755
            classes[i] = X86_64_SSE_CLASS;
2756
 
2757
          /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
2758
          if (classes[i] == X86_64_X87UP_CLASS
2759
              && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2760
            classes[i] = X86_64_SSE_CLASS;
2761
        }
2762
      return words;
2763
    }
2764
 
2765
  /* Compute alignment needed.  We align all types to natural boundaries with
2766
     exception of XFmode that is aligned to 64bits.  */
2767
  if (mode != VOIDmode && mode != BLKmode)
2768
    {
2769
      int mode_alignment = GET_MODE_BITSIZE (mode);
2770
 
2771
      if (mode == XFmode)
2772
        mode_alignment = 128;
2773
      else if (mode == XCmode)
2774
        mode_alignment = 256;
2775
      if (COMPLEX_MODE_P (mode))
2776
        mode_alignment /= 2;
2777
      /* Misaligned fields are always returned in memory.  */
2778
      if (bit_offset % mode_alignment)
2779
        return 0;
2780
    }
2781
 
2782
  /* for V1xx modes, just use the base mode */
2783
  if (VECTOR_MODE_P (mode)
2784
      && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2785
    mode = GET_MODE_INNER (mode);
2786
 
2787
  /* Classification of atomic types.  */
2788
  switch (mode)
2789
    {
2790
    case DImode:
2791
    case SImode:
2792
    case HImode:
2793
    case QImode:
2794
    case CSImode:
2795
    case CHImode:
2796
    case CQImode:
2797
      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2798
        classes[0] = X86_64_INTEGERSI_CLASS;
2799
      else
2800
        classes[0] = X86_64_INTEGER_CLASS;
2801
      return 1;
2802
    case CDImode:
2803
    case TImode:
2804
      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2805
      return 2;
2806
    case CTImode:
2807
      return 0;
2808
    case SFmode:
2809
      if (!(bit_offset % 64))
2810
        classes[0] = X86_64_SSESF_CLASS;
2811
      else
2812
        classes[0] = X86_64_SSE_CLASS;
2813
      return 1;
2814
    case DFmode:
2815
      classes[0] = X86_64_SSEDF_CLASS;
2816
      return 1;
2817
    case XFmode:
2818
      classes[0] = X86_64_X87_CLASS;
2819
      classes[1] = X86_64_X87UP_CLASS;
2820
      return 2;
2821
    case TFmode:
2822
      classes[0] = X86_64_SSE_CLASS;
2823
      classes[1] = X86_64_SSEUP_CLASS;
2824
      return 2;
2825
    case SCmode:
2826
      classes[0] = X86_64_SSE_CLASS;
2827
      return 1;
2828
    case DCmode:
2829
      classes[0] = X86_64_SSEDF_CLASS;
2830
      classes[1] = X86_64_SSEDF_CLASS;
2831
      return 2;
2832
    case XCmode:
2833
      classes[0] = X86_64_COMPLEX_X87_CLASS;
2834
      return 1;
2835
    case TCmode:
2836
      /* This modes is larger than 16 bytes.  */
2837
      return 0;
2838
    case V4SFmode:
2839
    case V4SImode:
2840
    case V16QImode:
2841
    case V8HImode:
2842
    case V2DFmode:
2843
    case V2DImode:
2844
      classes[0] = X86_64_SSE_CLASS;
2845
      classes[1] = X86_64_SSEUP_CLASS;
2846
      return 2;
2847
    case V2SFmode:
2848
    case V2SImode:
2849
    case V4HImode:
2850
    case V8QImode:
2851
      classes[0] = X86_64_SSE_CLASS;
2852
      return 1;
2853
    case BLKmode:
2854
    case VOIDmode:
2855
      return 0;
2856
    default:
2857
      gcc_assert (VECTOR_MODE_P (mode));
2858
 
2859
      if (bytes > 16)
2860
        return 0;
2861
 
2862
      gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2863
 
2864
      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2865
        classes[0] = X86_64_INTEGERSI_CLASS;
2866
      else
2867
        classes[0] = X86_64_INTEGER_CLASS;
2868
      classes[1] = X86_64_INTEGER_CLASS;
2869
      return 1 + (bytes > 8);
2870
    }
2871
}
2872
 
2873
/* Examine the argument and return set number of register required in each
2874
   class.  Return 0 iff parameter should be passed in memory.  */
2875
static int
2876
examine_argument (enum machine_mode mode, tree type, int in_return,
2877
                  int *int_nregs, int *sse_nregs)
2878
{
2879
  enum x86_64_reg_class class[MAX_CLASSES];
2880
  int n = classify_argument (mode, type, class, 0);
2881
 
2882
  *int_nregs = 0;
2883
  *sse_nregs = 0;
2884
  if (!n)
2885
    return 0;
2886
  for (n--; n >= 0; n--)
2887
    switch (class[n])
2888
      {
2889
      case X86_64_INTEGER_CLASS:
2890
      case X86_64_INTEGERSI_CLASS:
2891
        (*int_nregs)++;
2892
        break;
2893
      case X86_64_SSE_CLASS:
2894
      case X86_64_SSESF_CLASS:
2895
      case X86_64_SSEDF_CLASS:
2896
        (*sse_nregs)++;
2897
        break;
2898
      case X86_64_NO_CLASS:
2899
      case X86_64_SSEUP_CLASS:
2900
        break;
2901
      case X86_64_X87_CLASS:
2902
      case X86_64_X87UP_CLASS:
2903
        if (!in_return)
2904
          return 0;
2905
        break;
2906
      case X86_64_COMPLEX_X87_CLASS:
2907
        return in_return ? 2 : 0;
2908
      case X86_64_MEMORY_CLASS:
2909
        gcc_unreachable ();
2910
      }
2911
  return 1;
2912
}
2913
 
2914
/* Construct container for the argument used by GCC interface.  See
2915
   FUNCTION_ARG for the detailed description.  */
2916
 
2917
static rtx
2918
construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2919
                     tree type, int in_return, int nintregs, int nsseregs,
2920
                     const int *intreg, int sse_regno)
2921
{
2922
  enum machine_mode tmpmode;
2923
  int bytes =
2924
    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2925
  enum x86_64_reg_class class[MAX_CLASSES];
2926
  int n;
2927
  int i;
2928
  int nexps = 0;
2929
  int needed_sseregs, needed_intregs;
2930
  rtx exp[MAX_CLASSES];
2931
  rtx ret;
2932
 
2933
  n = classify_argument (mode, type, class, 0);
2934
  if (TARGET_DEBUG_ARG)
2935
    {
2936
      if (!n)
2937
        fprintf (stderr, "Memory class\n");
2938
      else
2939
        {
2940
          fprintf (stderr, "Classes:");
2941
          for (i = 0; i < n; i++)
2942
            {
2943
              fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2944
            }
2945
           fprintf (stderr, "\n");
2946
        }
2947
    }
2948
  if (!n)
2949
    return NULL;
2950
  if (!examine_argument (mode, type, in_return, &needed_intregs,
2951
                         &needed_sseregs))
2952
    return NULL;
2953
  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2954
    return NULL;
2955
 
2956
  /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
2957
     some less clueful developer tries to use floating-point anyway.  */
2958
  if (needed_sseregs && !TARGET_SSE)
2959
    {
2960
      static bool issued_error;
2961
      if (!issued_error)
2962
        {
2963
          issued_error = true;
2964
          if (in_return)
2965
            error ("SSE register return with SSE disabled");
2966
          else
2967
            error ("SSE register argument with SSE disabled");
2968
        }
2969
      return NULL;
2970
    }
2971
 
2972
  /* First construct simple cases.  Avoid SCmode, since we want to use
2973
     single register to pass this type.  */
2974
  if (n == 1 && mode != SCmode)
2975
    switch (class[0])
2976
      {
2977
      case X86_64_INTEGER_CLASS:
2978
      case X86_64_INTEGERSI_CLASS:
2979
        return gen_rtx_REG (mode, intreg[0]);
2980
      case X86_64_SSE_CLASS:
2981
      case X86_64_SSESF_CLASS:
2982
      case X86_64_SSEDF_CLASS:
2983
        return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
2984
      case X86_64_X87_CLASS:
2985
      case X86_64_COMPLEX_X87_CLASS:
2986
        return gen_rtx_REG (mode, FIRST_STACK_REG);
2987
      case X86_64_NO_CLASS:
2988
        /* Zero sized array, struct or class.  */
2989
        return NULL;
2990
      default:
2991
        gcc_unreachable ();
2992
      }
2993
  if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2994
      && mode != BLKmode)
2995
    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2996
  if (n == 2
2997
      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2998
    return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2999
  if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3000
      && class[1] == X86_64_INTEGER_CLASS
3001
      && (mode == CDImode || mode == TImode || mode == TFmode)
3002
      && intreg[0] + 1 == intreg[1])
3003
    return gen_rtx_REG (mode, intreg[0]);
3004
 
3005
  /* Otherwise figure out the entries of the PARALLEL.  */
3006
  for (i = 0; i < n; i++)
3007
    {
3008
      switch (class[i])
3009
        {
3010
          case X86_64_NO_CLASS:
3011
            break;
3012
          case X86_64_INTEGER_CLASS:
3013
          case X86_64_INTEGERSI_CLASS:
3014
            /* Merge TImodes on aligned occasions here too.  */
3015
            if (i * 8 + 8 > bytes)
3016
              tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3017
            else if (class[i] == X86_64_INTEGERSI_CLASS)
3018
              tmpmode = SImode;
3019
            else
3020
              tmpmode = DImode;
3021
            /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
3022
            if (tmpmode == BLKmode)
3023
              tmpmode = DImode;
3024
            exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3025
                                               gen_rtx_REG (tmpmode, *intreg),
3026
                                               GEN_INT (i*8));
3027
            intreg++;
3028
            break;
3029
          case X86_64_SSESF_CLASS:
3030
            exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3031
                                               gen_rtx_REG (SFmode,
3032
                                                            SSE_REGNO (sse_regno)),
3033
                                               GEN_INT (i*8));
3034
            sse_regno++;
3035
            break;
3036
          case X86_64_SSEDF_CLASS:
3037
            exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3038
                                               gen_rtx_REG (DFmode,
3039
                                                            SSE_REGNO (sse_regno)),
3040
                                               GEN_INT (i*8));
3041
            sse_regno++;
3042
            break;
3043
          case X86_64_SSE_CLASS:
3044
            if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3045
              tmpmode = TImode;
3046
            else
3047
              tmpmode = DImode;
3048
            exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3049
                                               gen_rtx_REG (tmpmode,
3050
                                                            SSE_REGNO (sse_regno)),
3051
                                               GEN_INT (i*8));
3052
            if (tmpmode == TImode)
3053
              i++;
3054
            sse_regno++;
3055
            break;
3056
          default:
3057
            gcc_unreachable ();
3058
        }
3059
    }
3060
 
3061
  /* Empty aligned struct, union or class.  */
3062
  if (nexps == 0)
3063
    return NULL;
3064
 
3065
  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3066
  for (i = 0; i < nexps; i++)
3067
    XVECEXP (ret, 0, i) = exp [i];
3068
  return ret;
3069
}
3070
 
3071
/* Update the data in CUM to advance over an argument
3072
   of mode MODE and data type TYPE.
3073
   (TYPE is null for libcalls where that information may not be available.)  */
3074
 
3075
void
3076
function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3077
                      tree type, int named)
3078
{
3079
  int bytes =
3080
    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3081
  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3082
 
3083
  if (type)
3084
    mode = type_natural_mode (type);
3085
 
3086
  if (TARGET_DEBUG_ARG)
3087
    fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3088
             "mode=%s, named=%d)\n\n",
3089
             words, cum->words, cum->nregs, cum->sse_nregs,
3090
             GET_MODE_NAME (mode), named);
3091
 
3092
  if (TARGET_64BIT)
3093
    {
3094
      int int_nregs, sse_nregs;
3095
      if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3096
        cum->words += words;
3097
      else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3098
        {
3099
          cum->nregs -= int_nregs;
3100
          cum->sse_nregs -= sse_nregs;
3101
          cum->regno += int_nregs;
3102
          cum->sse_regno += sse_nregs;
3103
        }
3104
      else
3105
        cum->words += words;
3106
    }
3107
  else
3108
    {
3109
      switch (mode)
3110
        {
3111
        default:
3112
          break;
3113
 
3114
        case BLKmode:
3115
          if (bytes < 0)
3116
            break;
3117
          /* FALLTHRU */
3118
 
3119
        case DImode:
3120
        case SImode:
3121
        case HImode:
3122
        case QImode:
3123
          cum->words += words;
3124
          cum->nregs -= words;
3125
          cum->regno += words;
3126
 
3127
          if (cum->nregs <= 0)
3128
            {
3129
              cum->nregs = 0;
3130
              cum->regno = 0;
3131
            }
3132
          break;
3133
 
3134
        case DFmode:
3135
          if (cum->float_in_sse < 2)
3136
            break;
3137
        case SFmode:
3138
          if (cum->float_in_sse < 1)
3139
            break;
3140
          /* FALLTHRU */
3141
 
3142
        case TImode:
3143
        case V16QImode:
3144
        case V8HImode:
3145
        case V4SImode:
3146
        case V2DImode:
3147
        case V4SFmode:
3148
        case V2DFmode:
3149
          if (!type || !AGGREGATE_TYPE_P (type))
3150
            {
3151
              cum->sse_words += words;
3152
              cum->sse_nregs -= 1;
3153
              cum->sse_regno += 1;
3154
              if (cum->sse_nregs <= 0)
3155
                {
3156
                  cum->sse_nregs = 0;
3157
                  cum->sse_regno = 0;
3158
                }
3159
            }
3160
          break;
3161
 
3162
        case V8QImode:
3163
        case V4HImode:
3164
        case V2SImode:
3165
        case V2SFmode:
3166
          if (!type || !AGGREGATE_TYPE_P (type))
3167
            {
3168
              cum->mmx_words += words;
3169
              cum->mmx_nregs -= 1;
3170
              cum->mmx_regno += 1;
3171
              if (cum->mmx_nregs <= 0)
3172
                {
3173
                  cum->mmx_nregs = 0;
3174
                  cum->mmx_regno = 0;
3175
                }
3176
            }
3177
          break;
3178
        }
3179
    }
3180
}
3181
 
3182
/* Define where to put the arguments to a function.
3183
   Value is zero to push the argument on the stack,
3184
   or a hard register in which to store the argument.
3185
 
3186
   MODE is the argument's machine mode.
3187
   TYPE is the data type of the argument (as a tree).
3188
    This is null for libcalls where that information may
3189
    not be available.
3190
   CUM is a variable of type CUMULATIVE_ARGS which gives info about
3191
    the preceding args and about the function being called.
3192
   NAMED is nonzero if this argument is a named parameter
3193
    (otherwise it is an extra parameter matching an ellipsis).  */
3194
 
3195
rtx
3196
function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3197
              tree type, int named)
3198
{
3199
  enum machine_mode mode = orig_mode;
3200
  rtx ret = NULL_RTX;
3201
  int bytes =
3202
    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3203
  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3204
  static bool warnedsse, warnedmmx;
3205
 
3206
  /* To simplify the code below, represent vector types with a vector mode
3207
     even if MMX/SSE are not active.  */
3208
  if (type && TREE_CODE (type) == VECTOR_TYPE)
3209
    mode = type_natural_mode (type);
3210
 
3211
  /* Handle a hidden AL argument containing number of registers for varargs
3212
     x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
3213
     any AL settings.  */
3214
  if (mode == VOIDmode)
3215
    {
3216
      if (TARGET_64BIT)
3217
        return GEN_INT (cum->maybe_vaarg
3218
                        ? (cum->sse_nregs < 0
3219
                           ? SSE_REGPARM_MAX
3220
                           : cum->sse_regno)
3221
                        : -1);
3222
      else
3223
        return constm1_rtx;
3224
    }
3225
  if (TARGET_64BIT)
3226
    ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3227
                               cum->sse_nregs,
3228
                               &x86_64_int_parameter_registers [cum->regno],
3229
                               cum->sse_regno);
3230
  else
3231
    switch (mode)
3232
      {
3233
        /* For now, pass fp/complex values on the stack.  */
3234
      default:
3235
        break;
3236
 
3237
      case BLKmode:
3238
        if (bytes < 0)
3239
          break;
3240
        /* FALLTHRU */
3241
      case DImode:
3242
      case SImode:
3243
      case HImode:
3244
      case QImode:
3245
        if (words <= cum->nregs)
3246
          {
3247
            int regno = cum->regno;
3248
 
3249
            /* Fastcall allocates the first two DWORD (SImode) or
3250
               smaller arguments to ECX and EDX.  */
3251
            if (cum->fastcall)
3252
              {
3253
                if (mode == BLKmode || mode == DImode)
3254
                  break;
3255
 
3256
                /* ECX not EAX is the first allocated register.  */
3257
                if (regno == 0)
3258
                  regno = 2;
3259
              }
3260
            ret = gen_rtx_REG (mode, regno);
3261
          }
3262
        break;
3263
      case DFmode:
3264
        if (cum->float_in_sse < 2)
3265
          break;
3266
      case SFmode:
3267
        if (cum->float_in_sse < 1)
3268
          break;
3269
        /* FALLTHRU */
3270
      case TImode:
3271
      case V16QImode:
3272
      case V8HImode:
3273
      case V4SImode:
3274
      case V2DImode:
3275
      case V4SFmode:
3276
      case V2DFmode:
3277
        if (!type || !AGGREGATE_TYPE_P (type))
3278
          {
3279
            if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3280
              {
3281
                warnedsse = true;
3282
                warning (0, "SSE vector argument without SSE enabled "
3283
                         "changes the ABI");
3284
              }
3285
            if (cum->sse_nregs)
3286
              ret = gen_reg_or_parallel (mode, orig_mode,
3287
                                         cum->sse_regno + FIRST_SSE_REG);
3288
          }
3289
        break;
3290
      case V8QImode:
3291
      case V4HImode:
3292
      case V2SImode:
3293
      case V2SFmode:
3294
        if (!type || !AGGREGATE_TYPE_P (type))
3295
          {
3296
            if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3297
              {
3298
                warnedmmx = true;
3299
                warning (0, "MMX vector argument without MMX enabled "
3300
                         "changes the ABI");
3301
              }
3302
            if (cum->mmx_nregs)
3303
              ret = gen_reg_or_parallel (mode, orig_mode,
3304
                                         cum->mmx_regno + FIRST_MMX_REG);
3305
          }
3306
        break;
3307
      }
3308
 
3309
  if (TARGET_DEBUG_ARG)
3310
    {
3311
      fprintf (stderr,
3312
               "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3313
               words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3314
 
3315
      if (ret)
3316
        print_simple_rtl (stderr, ret);
3317
      else
3318
        fprintf (stderr, ", stack");
3319
 
3320
      fprintf (stderr, " )\n");
3321
    }
3322
 
3323
  return ret;
3324
}
3325
 
3326
/* A C expression that indicates when an argument must be passed by
3327
   reference.  If nonzero for an argument, a copy of that argument is
3328
   made in memory and a pointer to the argument is passed instead of
3329
   the argument itself.  The pointer is passed in whatever way is
3330
   appropriate for passing a pointer to that type.  */
3331
 
3332
static bool
3333
ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3334
                        enum machine_mode mode ATTRIBUTE_UNUSED,
3335
                        tree type, bool named ATTRIBUTE_UNUSED)
3336
{
3337
  if (!TARGET_64BIT)
3338
    return 0;
3339
 
3340
  if (type && int_size_in_bytes (type) == -1)
3341
    {
3342
      if (TARGET_DEBUG_ARG)
3343
        fprintf (stderr, "function_arg_pass_by_reference\n");
3344
      return 1;
3345
    }
3346
 
3347
  return 0;
3348
}
3349
 
3350
/* Return true when TYPE should be 128bit aligned for 32bit argument passing
3351
   ABI.  Only called if TARGET_SSE.  */
3352
static bool
3353
contains_128bit_aligned_vector_p (tree type)
3354
{
3355
  enum machine_mode mode = TYPE_MODE (type);
3356
  if (SSE_REG_MODE_P (mode)
3357
      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3358
    return true;
3359
  if (TYPE_ALIGN (type) < 128)
3360
    return false;
3361
 
3362
  if (AGGREGATE_TYPE_P (type))
3363
    {
3364
      /* Walk the aggregates recursively.  */
3365
      switch (TREE_CODE (type))
3366
        {
3367
        case RECORD_TYPE:
3368
        case UNION_TYPE:
3369
        case QUAL_UNION_TYPE:
3370
          {
3371
            tree field;
3372
 
3373
            if (TYPE_BINFO (type))
3374
              {
3375
                tree binfo, base_binfo;
3376
                int i;
3377
 
3378
                for (binfo = TYPE_BINFO (type), i = 0;
3379
                     BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3380
                  if (contains_128bit_aligned_vector_p
3381
                      (BINFO_TYPE (base_binfo)))
3382
                    return true;
3383
              }
3384
            /* And now merge the fields of structure.  */
3385
            for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3386
              {
3387
                if (TREE_CODE (field) == FIELD_DECL
3388
                    && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3389
                  return true;
3390
              }
3391
            break;
3392
          }
3393
 
3394
        case ARRAY_TYPE:
3395
          /* Just for use if some languages passes arrays by value.  */
3396
          if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3397
            return true;
3398
          break;
3399
 
3400
        default:
3401
          gcc_unreachable ();
3402
        }
3403
    }
3404
  return false;
3405
}
3406
 
3407
/* Gives the alignment boundary, in bits, of an argument with the
3408
   specified mode and type.  */
3409
 
3410
int
3411
ix86_function_arg_boundary (enum machine_mode mode, tree type)
3412
{
3413
  int align;
3414
  if (type)
3415
    align = TYPE_ALIGN (type);
3416
  else
3417
    align = GET_MODE_ALIGNMENT (mode);
3418
  if (align < PARM_BOUNDARY)
3419
    align = PARM_BOUNDARY;
3420
  if (!TARGET_64BIT)
3421
    {
3422
      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
3423
         make an exception for SSE modes since these require 128bit
3424
         alignment.
3425
 
3426
         The handling here differs from field_alignment.  ICC aligns MMX
3427
         arguments to 4 byte boundaries, while structure fields are aligned
3428
         to 8 byte boundaries.  */
3429
      if (!TARGET_SSE)
3430
        align = PARM_BOUNDARY;
3431
      else if (!type)
3432
        {
3433
          if (!SSE_REG_MODE_P (mode))
3434
            align = PARM_BOUNDARY;
3435
        }
3436
      else
3437
        {
3438
          if (!contains_128bit_aligned_vector_p (type))
3439
            align = PARM_BOUNDARY;
3440
        }
3441
    }
3442
  if (align > 128)
3443
    align = 128;
3444
  return align;
3445
}
3446
 
3447
/* Return true if N is a possible register number of function value.  */
3448
bool
3449
ix86_function_value_regno_p (int regno)
3450
{
3451
  if (regno == 0
3452
      || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3453
      || (regno == FIRST_SSE_REG && TARGET_SSE))
3454
    return true;
3455
 
3456
  if (!TARGET_64BIT
3457
      && (regno == FIRST_MMX_REG && TARGET_MMX))
3458
        return true;
3459
 
3460
  return false;
3461
}
3462
 
3463
/* Define how to find the value returned by a function.
3464
   VALTYPE is the data type of the value (as a tree).
3465
   If the precise function being called is known, FUNC is its FUNCTION_DECL;
3466
   otherwise, FUNC is 0.  */
3467
rtx
3468
ix86_function_value (tree valtype, tree fntype_or_decl,
3469
                     bool outgoing ATTRIBUTE_UNUSED)
3470
{
3471
  enum machine_mode natmode = type_natural_mode (valtype);
3472
 
3473
  if (TARGET_64BIT)
3474
    {
3475
      rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3476
                                     1, REGPARM_MAX, SSE_REGPARM_MAX,
3477
                                     x86_64_int_return_registers, 0);
3478
      /* For zero sized structures, construct_container return NULL, but we
3479
         need to keep rest of compiler happy by returning meaningful value.  */
3480
      if (!ret)
3481
        ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3482
      return ret;
3483
    }
3484
  else
3485
    {
3486
      tree fn = NULL_TREE, fntype;
3487
      if (fntype_or_decl
3488
          && DECL_P (fntype_or_decl))
3489
        fn = fntype_or_decl;
3490
      fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3491
      return gen_rtx_REG (TYPE_MODE (valtype),
3492
                          ix86_value_regno (natmode, fn, fntype));
3493
    }
3494
}
3495
 
3496
/* Return false iff type is returned in memory.  */
3497
int
3498
ix86_return_in_memory (tree type)
3499
{
3500
  int needed_intregs, needed_sseregs, size;
3501
  enum machine_mode mode = type_natural_mode (type);
3502
 
3503
  if (TARGET_64BIT)
3504
    return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3505
 
3506
  if (mode == BLKmode)
3507
    return 1;
3508
 
3509
  size = int_size_in_bytes (type);
3510
 
3511
  if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3512
    return 0;
3513
 
3514
  if (VECTOR_MODE_P (mode) || mode == TImode)
3515
    {
3516
      /* User-created vectors small enough to fit in EAX.  */
3517
      if (size < 8)
3518
        return 0;
3519
 
3520
      /* MMX/3dNow values are returned in MM0,
3521
         except when it doesn't exits.  */
3522
      if (size == 8)
3523
        return (TARGET_MMX ? 0 : 1);
3524
 
3525
      /* SSE values are returned in XMM0, except when it doesn't exist.  */
3526
      if (size == 16)
3527
        return (TARGET_SSE ? 0 : 1);
3528
    }
3529
 
3530
  if (mode == XFmode)
3531
    return 0;
3532
 
3533
  if (size > 12)
3534
    return 1;
3535
  return 0;
3536
}
3537
 
3538
/* When returning SSE vector types, we have a choice of either
3539
     (1) being abi incompatible with a -march switch, or
3540
     (2) generating an error.
3541
   Given no good solution, I think the safest thing is one warning.
3542
   The user won't be able to use -Werror, but....
3543
 
3544
   Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3545
   called in response to actually generating a caller or callee that
3546
   uses such a type.  As opposed to RETURN_IN_MEMORY, which is called
3547
   via aggregate_value_p for general type probing from tree-ssa.  */
3548
 
3549
static rtx
3550
ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3551
{
3552
  static bool warnedsse, warnedmmx;
3553
 
3554
  if (type)
3555
    {
3556
      /* Look at the return type of the function, not the function type.  */
3557
      enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3558
 
3559
      if (!TARGET_SSE && !warnedsse)
3560
        {
3561
          if (mode == TImode
3562
              || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3563
            {
3564
              warnedsse = true;
3565
              warning (0, "SSE vector return without SSE enabled "
3566
                       "changes the ABI");
3567
            }
3568
        }
3569
 
3570
      if (!TARGET_MMX && !warnedmmx)
3571
        {
3572
          if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3573
            {
3574
              warnedmmx = true;
3575
              warning (0, "MMX vector return without MMX enabled "
3576
                       "changes the ABI");
3577
            }
3578
        }
3579
    }
3580
 
3581
  return NULL;
3582
}
3583
 
3584
/* Define how to find the value returned by a library function
3585
   assuming the value has mode MODE.  */
3586
rtx
3587
ix86_libcall_value (enum machine_mode mode)
3588
{
3589
  if (TARGET_64BIT)
3590
    {
3591
      switch (mode)
3592
        {
3593
        case SFmode:
3594
        case SCmode:
3595
        case DFmode:
3596
        case DCmode:
3597
        case TFmode:
3598
          return gen_rtx_REG (mode, FIRST_SSE_REG);
3599
        case XFmode:
3600
        case XCmode:
3601
          return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3602
        case TCmode:
3603
          return NULL;
3604
        default:
3605
          return gen_rtx_REG (mode, 0);
3606
        }
3607
    }
3608
  else
3609
    return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3610
}
3611
 
3612
/* Given a mode, return the register to use for a return value.  */
3613
 
3614
static int
3615
ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3616
{
3617
  gcc_assert (!TARGET_64BIT);
3618
 
3619
  /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3620
     we prevent this case when mmx is not available.  */
3621
  if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
3622
    return FIRST_MMX_REG;
3623
 
3624
  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
3625
     we prevent this case when sse is not available.  */
3626
  if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3627
    return FIRST_SSE_REG;
3628
 
3629
  /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values.  */
3630
  if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387)
3631
    return 0;
3632
 
3633
  /* Floating point return values in %st(0), except for local functions when
3634
     SSE math is enabled or for functions with sseregparm attribute.  */
3635
  if ((func || fntype)
3636
      && (mode == SFmode || mode == DFmode))
3637
    {
3638
      int sse_level = ix86_function_sseregparm (fntype, func);
3639
      if ((sse_level >= 1 && mode == SFmode)
3640
          || (sse_level == 2 && mode == DFmode))
3641
        return FIRST_SSE_REG;
3642
    }
3643
 
3644
  return FIRST_FLOAT_REG;
3645
}
3646
 
3647
/* Create the va_list data type.  */
3648
 
3649
static tree
3650
ix86_build_builtin_va_list (void)
3651
{
3652
  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3653
 
3654
  /* For i386 we use plain pointer to argument area.  */
3655
  if (!TARGET_64BIT)
3656
    return build_pointer_type (char_type_node);
3657
 
3658
  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3659
  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3660
 
3661
  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3662
                      unsigned_type_node);
3663
  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3664
                      unsigned_type_node);
3665
  f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3666
                      ptr_type_node);
3667
  f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3668
                      ptr_type_node);
3669
 
3670
  va_list_gpr_counter_field = f_gpr;
3671
  va_list_fpr_counter_field = f_fpr;
3672
 
3673
  DECL_FIELD_CONTEXT (f_gpr) = record;
3674
  DECL_FIELD_CONTEXT (f_fpr) = record;
3675
  DECL_FIELD_CONTEXT (f_ovf) = record;
3676
  DECL_FIELD_CONTEXT (f_sav) = record;
3677
 
3678
  TREE_CHAIN (record) = type_decl;
3679
  TYPE_NAME (record) = type_decl;
3680
  TYPE_FIELDS (record) = f_gpr;
3681
  TREE_CHAIN (f_gpr) = f_fpr;
3682
  TREE_CHAIN (f_fpr) = f_ovf;
3683
  TREE_CHAIN (f_ovf) = f_sav;
3684
 
3685
  layout_type (record);
3686
 
3687
  /* The correct type is an array type of one element.  */
3688
  return build_array_type (record, build_index_type (size_zero_node));
3689
}
3690
 
3691
/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
3692
 
3693
static void
3694
ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3695
                             tree type, int *pretend_size ATTRIBUTE_UNUSED,
3696
                             int no_rtl)
3697
{
3698
  CUMULATIVE_ARGS next_cum;
3699
  rtx save_area = NULL_RTX, mem;
3700
  rtx label;
3701
  rtx label_ref;
3702
  rtx tmp_reg;
3703
  rtx nsse_reg;
3704
  int set;
3705
  tree fntype;
3706
  int stdarg_p;
3707
  int i;
3708
 
3709
  if (!TARGET_64BIT)
3710
    return;
3711
 
3712
  if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
3713
    return;
3714
 
3715
  /* Indicate to allocate space on the stack for varargs save area.  */
3716
  ix86_save_varrargs_registers = 1;
3717
 
3718
  cfun->stack_alignment_needed = 128;
3719
 
3720
  fntype = TREE_TYPE (current_function_decl);
3721
  stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3722
              && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3723
                  != void_type_node));
3724
 
3725
  /* For varargs, we do not want to skip the dummy va_dcl argument.
3726
     For stdargs, we do want to skip the last named argument.  */
3727
  next_cum = *cum;
3728
  if (stdarg_p)
3729
    function_arg_advance (&next_cum, mode, type, 1);
3730
 
3731
  if (!no_rtl)
3732
    save_area = frame_pointer_rtx;
3733
 
3734
  set = get_varargs_alias_set ();
3735
 
3736
  for (i = next_cum.regno;
3737
       i < ix86_regparm
3738
       && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
3739
       i++)
3740
    {
3741
      mem = gen_rtx_MEM (Pmode,
3742
                         plus_constant (save_area, i * UNITS_PER_WORD));
3743
      MEM_NOTRAP_P (mem) = 1;
3744
      set_mem_alias_set (mem, set);
3745
      emit_move_insn (mem, gen_rtx_REG (Pmode,
3746
                                        x86_64_int_parameter_registers[i]));
3747
    }
3748
 
3749
  if (next_cum.sse_nregs && cfun->va_list_fpr_size)
3750
    {
3751
      /* Now emit code to save SSE registers.  The AX parameter contains number
3752
         of SSE parameter registers used to call this function.  We use
3753
         sse_prologue_save insn template that produces computed jump across
3754
         SSE saves.  We need some preparation work to get this working.  */
3755
 
3756
      label = gen_label_rtx ();
3757
      label_ref = gen_rtx_LABEL_REF (Pmode, label);
3758
 
3759
      /* Compute address to jump to :
3760
         label - 5*eax + nnamed_sse_arguments*5  */
3761
      tmp_reg = gen_reg_rtx (Pmode);
3762
      nsse_reg = gen_reg_rtx (Pmode);
3763
      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3764
      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3765
                              gen_rtx_MULT (Pmode, nsse_reg,
3766
                                            GEN_INT (4))));
3767
      if (next_cum.sse_regno)
3768
        emit_move_insn
3769
          (nsse_reg,
3770
           gen_rtx_CONST (DImode,
3771
                          gen_rtx_PLUS (DImode,
3772
                                        label_ref,
3773
                                        GEN_INT (next_cum.sse_regno * 4))));
3774
      else
3775
        emit_move_insn (nsse_reg, label_ref);
3776
      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3777
 
3778
      /* Compute address of memory block we save into.  We always use pointer
3779
         pointing 127 bytes after first byte to store - this is needed to keep
3780
         instruction size limited by 4 bytes.  */
3781
      tmp_reg = gen_reg_rtx (Pmode);
3782
      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3783
                              plus_constant (save_area,
3784
                                             8 * REGPARM_MAX + 127)));
3785
      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3786
      MEM_NOTRAP_P (mem) = 1;
3787
      set_mem_alias_set (mem, set);
3788
      set_mem_align (mem, BITS_PER_WORD);
3789
 
3790
      /* And finally do the dirty job!  */
3791
      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3792
                                        GEN_INT (next_cum.sse_regno), label));
3793
    }
3794
 
3795
}
3796
 
3797
/* Implement va_start.  */
3798
 
3799
void
3800
ix86_va_start (tree valist, rtx nextarg)
3801
{
3802
  HOST_WIDE_INT words, n_gpr, n_fpr;
3803
  tree f_gpr, f_fpr, f_ovf, f_sav;
3804
  tree gpr, fpr, ovf, sav, t;
3805
 
3806
  /* Only 64bit target needs something special.  */
3807
  if (!TARGET_64BIT)
3808
    {
3809
      std_expand_builtin_va_start (valist, nextarg);
3810
      return;
3811
    }
3812
 
3813
  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3814
  f_fpr = TREE_CHAIN (f_gpr);
3815
  f_ovf = TREE_CHAIN (f_fpr);
3816
  f_sav = TREE_CHAIN (f_ovf);
3817
 
3818
  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3819
  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3820
  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3821
  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3822
  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3823
 
3824
  /* Count number of gp and fp argument registers used.  */
3825
  words = current_function_args_info.words;
3826
  n_gpr = current_function_args_info.regno;
3827
  n_fpr = current_function_args_info.sse_regno;
3828
 
3829
  if (TARGET_DEBUG_ARG)
3830
    fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3831
             (int) words, (int) n_gpr, (int) n_fpr);
3832
 
3833
  if (cfun->va_list_gpr_size)
3834
    {
3835
      t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3836
                 build_int_cst (NULL_TREE, n_gpr * 8));
3837
      TREE_SIDE_EFFECTS (t) = 1;
3838
      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3839
    }
3840
 
3841
  if (cfun->va_list_fpr_size)
3842
    {
3843
      t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3844
                 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3845
      TREE_SIDE_EFFECTS (t) = 1;
3846
      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3847
    }
3848
 
3849
  /* Find the overflow area.  */
3850
  t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3851
  if (words != 0)
3852
    t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3853
               build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3854
  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3855
  TREE_SIDE_EFFECTS (t) = 1;
3856
  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3857
 
3858
  if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
3859
    {
3860
      /* Find the register save area.
3861
         Prologue of the function save it right above stack frame.  */
3862
      t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3863
      t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3864
      TREE_SIDE_EFFECTS (t) = 1;
3865
      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3866
    }
3867
}
3868
 
3869
/* Implement va_arg.  */
3870
 
3871
tree
3872
ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3873
{
3874
  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3875
  tree f_gpr, f_fpr, f_ovf, f_sav;
3876
  tree gpr, fpr, ovf, sav, t;
3877
  int size, rsize;
3878
  tree lab_false, lab_over = NULL_TREE;
3879
  tree addr, t2;
3880
  rtx container;
3881
  int indirect_p = 0;
3882
  tree ptrtype;
3883
  enum machine_mode nat_mode;
3884
 
3885
  /* Only 64bit target needs something special.  */
3886
  if (!TARGET_64BIT)
3887
    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3888
 
3889
  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3890
  f_fpr = TREE_CHAIN (f_gpr);
3891
  f_ovf = TREE_CHAIN (f_fpr);
3892
  f_sav = TREE_CHAIN (f_ovf);
3893
 
3894
  valist = build_va_arg_indirect_ref (valist);
3895
  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3896
  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3897
  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3898
  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3899
 
3900
  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3901
  if (indirect_p)
3902
    type = build_pointer_type (type);
3903
  size = int_size_in_bytes (type);
3904
  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3905
 
3906
  nat_mode = type_natural_mode (type);
3907
  container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3908
                                   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3909
 
3910
  /* Pull the value out of the saved registers.  */
3911
 
3912
  addr = create_tmp_var (ptr_type_node, "addr");
3913
  DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3914
 
3915
  if (container)
3916
    {
3917
      int needed_intregs, needed_sseregs;
3918
      bool need_temp;
3919
      tree int_addr, sse_addr;
3920
 
3921
      lab_false = create_artificial_label ();
3922
      lab_over = create_artificial_label ();
3923
 
3924
      examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3925
 
3926
      need_temp = (!REG_P (container)
3927
                   && ((needed_intregs && TYPE_ALIGN (type) > 64)
3928
                       || TYPE_ALIGN (type) > 128));
3929
 
3930
      /* In case we are passing structure, verify that it is consecutive block
3931
         on the register save area.  If not we need to do moves.  */
3932
      if (!need_temp && !REG_P (container))
3933
        {
3934
          /* Verify that all registers are strictly consecutive  */
3935
          if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3936
            {
3937
              int i;
3938
 
3939
              for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3940
                {
3941
                  rtx slot = XVECEXP (container, 0, i);
3942
                  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3943
                      || INTVAL (XEXP (slot, 1)) != i * 16)
3944
                    need_temp = 1;
3945
                }
3946
            }
3947
          else
3948
            {
3949
              int i;
3950
 
3951
              for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3952
                {
3953
                  rtx slot = XVECEXP (container, 0, i);
3954
                  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3955
                      || INTVAL (XEXP (slot, 1)) != i * 8)
3956
                    need_temp = 1;
3957
                }
3958
            }
3959
        }
3960
      if (!need_temp)
3961
        {
3962
          int_addr = addr;
3963
          sse_addr = addr;
3964
        }
3965
      else
3966
        {
3967
          int_addr = create_tmp_var (ptr_type_node, "int_addr");
3968
          DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3969
          sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3970
          DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3971
        }
3972
 
3973
      /* First ensure that we fit completely in registers.  */
3974
      if (needed_intregs)
3975
        {
3976
          t = build_int_cst (TREE_TYPE (gpr),
3977
                             (REGPARM_MAX - needed_intregs + 1) * 8);
3978
          t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3979
          t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3980
          t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3981
          gimplify_and_add (t, pre_p);
3982
        }
3983
      if (needed_sseregs)
3984
        {
3985
          t = build_int_cst (TREE_TYPE (fpr),
3986
                             (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3987
                             + REGPARM_MAX * 8);
3988
          t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3989
          t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3990
          t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3991
          gimplify_and_add (t, pre_p);
3992
        }
3993
 
3994
      /* Compute index to start of area used for integer regs.  */
3995
      if (needed_intregs)
3996
        {
3997
          /* int_addr = gpr + sav; */
3998
          t = fold_convert (ptr_type_node, gpr);
3999
          t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4000
          t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4001
          gimplify_and_add (t, pre_p);
4002
        }
4003
      if (needed_sseregs)
4004
        {
4005
          /* sse_addr = fpr + sav; */
4006
          t = fold_convert (ptr_type_node, fpr);
4007
          t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4008
          t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4009
          gimplify_and_add (t, pre_p);
4010
        }
4011
      if (need_temp)
4012
        {
4013
          int i;
4014
          tree temp = create_tmp_var (type, "va_arg_tmp");
4015
 
4016
          /* addr = &temp; */
4017
          t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4018
          t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4019
          gimplify_and_add (t, pre_p);
4020
 
4021
          for (i = 0; i < XVECLEN (container, 0); i++)
4022
            {
4023
              rtx slot = XVECEXP (container, 0, i);
4024
              rtx reg = XEXP (slot, 0);
4025
              enum machine_mode mode = GET_MODE (reg);
4026
              tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4027
              tree addr_type = build_pointer_type (piece_type);
4028
              tree src_addr, src;
4029
              int src_offset;
4030
              tree dest_addr, dest;
4031
 
4032
              if (SSE_REGNO_P (REGNO (reg)))
4033
                {
4034
                  src_addr = sse_addr;
4035
                  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4036
                }
4037
              else
4038
                {
4039
                  src_addr = int_addr;
4040
                  src_offset = REGNO (reg) * 8;
4041
                }
4042
              src_addr = fold_convert (addr_type, src_addr);
4043
              src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4044
                                       size_int (src_offset)));
4045
              src = build_va_arg_indirect_ref (src_addr);
4046
 
4047
              dest_addr = fold_convert (addr_type, addr);
4048
              dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4049
                                        size_int (INTVAL (XEXP (slot, 1)))));
4050
              dest = build_va_arg_indirect_ref (dest_addr);
4051
 
4052
              t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4053
              gimplify_and_add (t, pre_p);
4054
            }
4055
        }
4056
 
4057
      if (needed_intregs)
4058
        {
4059
          t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4060
                      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4061
          t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4062
          gimplify_and_add (t, pre_p);
4063
        }
4064
      if (needed_sseregs)
4065
        {
4066
          t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4067
                      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4068
          t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4069
          gimplify_and_add (t, pre_p);
4070
        }
4071
 
4072
      t = build1 (GOTO_EXPR, void_type_node, lab_over);
4073
      gimplify_and_add (t, pre_p);
4074
 
4075
      t = build1 (LABEL_EXPR, void_type_node, lab_false);
4076
      append_to_statement_list (t, pre_p);
4077
    }
4078
 
4079
  /* ... otherwise out of the overflow area.  */
4080
 
4081
  /* Care for on-stack alignment if needed.  */
4082
  if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4083
      || integer_zerop (TYPE_SIZE (type)))
4084
    t = ovf;
4085
  else
4086
    {
4087
      HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4088
      t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4089
                 build_int_cst (TREE_TYPE (ovf), align - 1));
4090
      t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
4091
                 build_int_cst (TREE_TYPE (t), -align));
4092
    }
4093
  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4094
 
4095
  t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4096
  gimplify_and_add (t2, pre_p);
4097
 
4098
  t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4099
              build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4100
  t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4101
  gimplify_and_add (t, pre_p);
4102
 
4103
  if (container)
4104
    {
4105
      t = build1 (LABEL_EXPR, void_type_node, lab_over);
4106
      append_to_statement_list (t, pre_p);
4107
    }
4108
 
4109
  ptrtype = build_pointer_type (type);
4110
  addr = fold_convert (ptrtype, addr);
4111
 
4112
  if (indirect_p)
4113
    addr = build_va_arg_indirect_ref (addr);
4114
  return build_va_arg_indirect_ref (addr);
4115
}
4116
 
4117
/* Return nonzero if OPNUM's MEM should be matched
4118
   in movabs* patterns.  */
4119
 
4120
int
4121
ix86_check_movabs (rtx insn, int opnum)
4122
{
4123
  rtx set, mem;
4124
 
4125
  set = PATTERN (insn);
4126
  if (GET_CODE (set) == PARALLEL)
4127
    set = XVECEXP (set, 0, 0);
4128
  gcc_assert (GET_CODE (set) == SET);
4129
  mem = XEXP (set, opnum);
4130
  while (GET_CODE (mem) == SUBREG)
4131
    mem = SUBREG_REG (mem);
4132
  gcc_assert (GET_CODE (mem) == MEM);
4133
  return (volatile_ok || !MEM_VOLATILE_P (mem));
4134
}
4135
 
4136
/* Initialize the table of extra 80387 mathematical constants.  */
4137
 
4138
static void
4139
init_ext_80387_constants (void)
4140
{
4141
  static const char * cst[5] =
4142
  {
4143
    "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
4144
    "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
4145
    "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
4146
    "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
4147
    "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
4148
  };
4149
  int i;
4150
 
4151
  for (i = 0; i < 5; i++)
4152
    {
4153
      real_from_string (&ext_80387_constants_table[i], cst[i]);
4154
      /* Ensure each constant is rounded to XFmode precision.  */
4155
      real_convert (&ext_80387_constants_table[i],
4156
                    XFmode, &ext_80387_constants_table[i]);
4157
    }
4158
 
4159
  ext_80387_constants_init = 1;
4160
}
4161
 
4162
/* Return true if the constant is something that can be loaded with
4163
   a special instruction.  */
4164
 
4165
int
4166
standard_80387_constant_p (rtx x)
4167
{
4168
  if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4169
    return -1;
4170
 
4171
  if (x == CONST0_RTX (GET_MODE (x)))
4172
    return 1;
4173
  if (x == CONST1_RTX (GET_MODE (x)))
4174
    return 2;
4175
 
4176
  /* For XFmode constants, try to find a special 80387 instruction when
4177
     optimizing for size or on those CPUs that benefit from them.  */
4178
  if (GET_MODE (x) == XFmode
4179
      && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4180
    {
4181
      REAL_VALUE_TYPE r;
4182
      int i;
4183
 
4184
      if (! ext_80387_constants_init)
4185
        init_ext_80387_constants ();
4186
 
4187
      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4188
      for (i = 0; i < 5; i++)
4189
        if (real_identical (&r, &ext_80387_constants_table[i]))
4190
          return i + 3;
4191
    }
4192
 
4193
  return 0;
4194
}
4195
 
4196
/* Return the opcode of the special instruction to be used to load
4197
   the constant X.  */
4198
 
4199
const char *
4200
standard_80387_constant_opcode (rtx x)
4201
{
4202
  switch (standard_80387_constant_p (x))
4203
    {
4204
    case 1:
4205
      return "fldz";
4206
    case 2:
4207
      return "fld1";
4208
    case 3:
4209
      return "fldlg2";
4210
    case 4:
4211
      return "fldln2";
4212
    case 5:
4213
      return "fldl2e";
4214
    case 6:
4215
      return "fldl2t";
4216
    case 7:
4217
      return "fldpi";
4218
    default:
4219
      gcc_unreachable ();
4220
    }
4221
}
4222
 
4223
/* Return the CONST_DOUBLE representing the 80387 constant that is
4224
   loaded by the specified special instruction.  The argument IDX
4225
   matches the return value from standard_80387_constant_p.  */
4226
 
4227
rtx
4228
standard_80387_constant_rtx (int idx)
4229
{
4230
  int i;
4231
 
4232
  if (! ext_80387_constants_init)
4233
    init_ext_80387_constants ();
4234
 
4235
  switch (idx)
4236
    {
4237
    case 3:
4238
    case 4:
4239
    case 5:
4240
    case 6:
4241
    case 7:
4242
      i = idx - 3;
4243
      break;
4244
 
4245
    default:
4246
      gcc_unreachable ();
4247
    }
4248
 
4249
  return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4250
                                       XFmode);
4251
}
4252
 
4253
/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4254
 */
4255
int
4256
standard_sse_constant_p (rtx x)
4257
{
4258
  if (x == const0_rtx)
4259
    return 1;
4260
  return (x == CONST0_RTX (GET_MODE (x)));
4261
}
4262
 
4263
/* Returns 1 if OP contains a symbol reference */
4264
 
4265
int
4266
symbolic_reference_mentioned_p (rtx op)
4267
{
4268
  const char *fmt;
4269
  int i;
4270
 
4271
  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4272
    return 1;
4273
 
4274
  fmt = GET_RTX_FORMAT (GET_CODE (op));
4275
  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4276
    {
4277
      if (fmt[i] == 'E')
4278
        {
4279
          int j;
4280
 
4281
          for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4282
            if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4283
              return 1;
4284
        }
4285
 
4286
      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4287
        return 1;
4288
    }
4289
 
4290
  return 0;
4291
}
4292
 
4293
/* Return 1 if it is appropriate to emit `ret' instructions in the
4294
   body of a function.  Do this only if the epilogue is simple, needing a
4295
   couple of insns.  Prior to reloading, we can't tell how many registers
4296
   must be saved, so return 0 then.  Return 0 if there is no frame
4297
   marker to de-allocate.  */
4298
 
4299
int
4300
ix86_can_use_return_insn_p (void)
4301
{
4302
  struct ix86_frame frame;
4303
 
4304
  if (! reload_completed || frame_pointer_needed)
4305
    return 0;
4306
 
4307
  /* Don't allow more than 32 pop, since that's all we can do
4308
     with one instruction.  */
4309
  if (current_function_pops_args
4310
      && current_function_args_size >= 32768)
4311
    return 0;
4312
 
4313
  ix86_compute_frame_layout (&frame);
4314
  return frame.to_allocate == 0 && frame.nregs == 0;
4315
}
4316
 
4317
/* Value should be nonzero if functions must have frame pointers.
4318
   Zero means the frame pointer need not be set up (and parms may
4319
   be accessed via the stack pointer) in functions that seem suitable.  */
4320
 
4321
int
4322
ix86_frame_pointer_required (void)
4323
{
4324
  /* If we accessed previous frames, then the generated code expects
4325
     to be able to access the saved ebp value in our frame.  */
4326
  if (cfun->machine->accesses_prev_frame)
4327
    return 1;
4328
 
4329
  /* Several x86 os'es need a frame pointer for other reasons,
4330
     usually pertaining to setjmp.  */
4331
  if (SUBTARGET_FRAME_POINTER_REQUIRED)
4332
    return 1;
4333
 
4334
  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4335
     the frame pointer by default.  Turn it back on now if we've not
4336
     got a leaf function.  */
4337
  if (TARGET_OMIT_LEAF_FRAME_POINTER
4338
      && (!current_function_is_leaf))
4339
    return 1;
4340
 
4341
  if (current_function_profile)
4342
    return 1;
4343
 
4344
  return 0;
4345
}
4346
 
4347
/* Record that the current function accesses previous call frames.  */
4348
 
4349
void
4350
ix86_setup_frame_addresses (void)
4351
{
4352
  cfun->machine->accesses_prev_frame = 1;
4353
}
4354
 
4355
#if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
4356
# define USE_HIDDEN_LINKONCE 1
4357
#else
4358
# define USE_HIDDEN_LINKONCE 0
4359
#endif
4360
 
4361
static int pic_labels_used;
4362
 
4363
/* Fills in the label name that should be used for a pc thunk for
4364
   the given register.  */
4365
 
4366
static void
4367
get_pc_thunk_name (char name[32], unsigned int regno)
4368
{
4369
  if (USE_HIDDEN_LINKONCE)
4370
    sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4371
  else
4372
    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4373
}
4374
 
4375
 
4376
/* This function generates code for -fpic that loads %ebx with
4377
   the return address of the caller and then returns.  */
4378
 
4379
void
4380
ix86_file_end (void)
4381
{
4382
  rtx xops[2];
4383
  int regno;
4384
 
4385
  for (regno = 0; regno < 8; ++regno)
4386
    {
4387
      char name[32];
4388
 
4389
      if (! ((pic_labels_used >> regno) & 1))
4390
        continue;
4391
 
4392
      get_pc_thunk_name (name, regno);
4393
 
4394
      if (USE_HIDDEN_LINKONCE)
4395
        {
4396
          tree decl;
4397
 
4398
          decl = build_decl (FUNCTION_DECL, get_identifier (name),
4399
                             error_mark_node);
4400
          TREE_PUBLIC (decl) = 1;
4401
          TREE_STATIC (decl) = 1;
4402
          DECL_ONE_ONLY (decl) = 1;
4403
 
4404
          (*targetm.asm_out.unique_section) (decl, 0);
4405
          named_section (decl, NULL, 0);
4406
 
4407
          (*targetm.asm_out.globalize_label) (asm_out_file, name);
4408
          fputs ("\t.hidden\t", asm_out_file);
4409
          assemble_name (asm_out_file, name);
4410
          fputc ('\n', asm_out_file);
4411
          ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4412
        }
4413
      else
4414
        {
4415
          text_section ();
4416
          ASM_OUTPUT_LABEL (asm_out_file, name);
4417
        }
4418
 
4419
      xops[0] = gen_rtx_REG (SImode, regno);
4420
      xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4421
      output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4422
      output_asm_insn ("ret", xops);
4423
    }
4424
 
4425
  if (NEED_INDICATE_EXEC_STACK)
4426
    file_end_indicate_exec_stack ();
4427
}
4428
 
4429
/* Emit code for the SET_GOT patterns.  */
4430
 
4431
const char *
4432
output_set_got (rtx dest)
4433
{
4434
  rtx xops[3];
4435
 
4436
  xops[0] = dest;
4437
  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4438
 
4439
  if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4440
    {
4441
      xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4442
 
4443
      if (!flag_pic)
4444
        output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4445
      else
4446
        output_asm_insn ("call\t%a2", xops);
4447
 
4448
#if TARGET_MACHO
4449
      /* Output the "canonical" label name ("Lxx$pb") here too.  This
4450
         is what will be referred to by the Mach-O PIC subsystem.  */
4451
      ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4452
#endif
4453
      (*targetm.asm_out.internal_label) (asm_out_file, "L",
4454
                                 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4455
 
4456
      if (flag_pic)
4457
        output_asm_insn ("pop{l}\t%0", xops);
4458
    }
4459
  else
4460
    {
4461
      char name[32];
4462
      get_pc_thunk_name (name, REGNO (dest));
4463
      pic_labels_used |= 1 << REGNO (dest);
4464
 
4465
      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4466
      xops[2] = gen_rtx_MEM (QImode, xops[2]);
4467
      output_asm_insn ("call\t%X2", xops);
4468
    }
4469
 
4470
  if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4471
    output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4472
  else if (!TARGET_MACHO)
4473
    output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4474
 
4475
  return "";
4476
}
4477
 
4478
/* Generate an "push" pattern for input ARG.  */
4479
 
4480
static rtx
4481
gen_push (rtx arg)
4482
{
4483
  return gen_rtx_SET (VOIDmode,
4484
                      gen_rtx_MEM (Pmode,
4485
                                   gen_rtx_PRE_DEC (Pmode,
4486
                                                    stack_pointer_rtx)),
4487
                      arg);
4488
}
4489
 
4490
/* Return >= 0 if there is an unused call-clobbered register available
4491
   for the entire function.  */
4492
 
4493
static unsigned int
4494
ix86_select_alt_pic_regnum (void)
4495
{
4496
  if (current_function_is_leaf && !current_function_profile)
4497
    {
4498
      int i;
4499
      for (i = 2; i >= 0; --i)
4500
        if (!regs_ever_live[i])
4501
          return i;
4502
    }
4503
 
4504
  return INVALID_REGNUM;
4505
}
4506
 
4507
/* Return 1 if we need to save REGNO.  */
4508
static int
4509
ix86_save_reg (unsigned int regno, int maybe_eh_return)
4510
{
4511
  if (pic_offset_table_rtx
4512
      && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4513
      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4514
          || current_function_profile
4515
          || current_function_calls_eh_return
4516
          || current_function_uses_const_pool))
4517
    {
4518
      if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4519
        return 0;
4520
      return 1;
4521
    }
4522
 
4523
  if (current_function_calls_eh_return && maybe_eh_return)
4524
    {
4525
      unsigned i;
4526
      for (i = 0; ; i++)
4527
        {
4528
          unsigned test = EH_RETURN_DATA_REGNO (i);
4529
          if (test == INVALID_REGNUM)
4530
            break;
4531
          if (test == regno)
4532
            return 1;
4533
        }
4534
    }
4535
 
4536
  if (cfun->machine->force_align_arg_pointer
4537
      && regno == REGNO (cfun->machine->force_align_arg_pointer))
4538
    return 1;
4539
 
4540
  return (regs_ever_live[regno]
4541
          && !call_used_regs[regno]
4542
          && !fixed_regs[regno]
4543
          && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4544
}
4545
 
4546
/* Return number of registers to be saved on the stack.  */
4547
 
4548
static int
4549
ix86_nsaved_regs (void)
4550
{
4551
  int nregs = 0;
4552
  int regno;
4553
 
4554
  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4555
    if (ix86_save_reg (regno, true))
4556
      nregs++;
4557
  return nregs;
4558
}
4559
 
4560
/* Return the offset between two registers, one to be eliminated, and the other
4561
   its replacement, at the start of a routine.  */
4562
 
4563
HOST_WIDE_INT
4564
ix86_initial_elimination_offset (int from, int to)
4565
{
4566
  struct ix86_frame frame;
4567
  ix86_compute_frame_layout (&frame);
4568
 
4569
  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4570
    return frame.hard_frame_pointer_offset;
4571
  else if (from == FRAME_POINTER_REGNUM
4572
           && to == HARD_FRAME_POINTER_REGNUM)
4573
    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4574
  else
4575
    {
4576
      gcc_assert (to == STACK_POINTER_REGNUM);
4577
 
4578
      if (from == ARG_POINTER_REGNUM)
4579
        return frame.stack_pointer_offset;
4580
 
4581
      gcc_assert (from == FRAME_POINTER_REGNUM);
4582
      return frame.stack_pointer_offset - frame.frame_pointer_offset;
4583
    }
4584
}
4585
 
4586
/* Fill structure ix86_frame about frame of currently computed function.  */
4587
 
4588
static void
4589
ix86_compute_frame_layout (struct ix86_frame *frame)
4590
{
4591
  HOST_WIDE_INT total_size;
4592
  unsigned int stack_alignment_needed;
4593
  HOST_WIDE_INT offset;
4594
  unsigned int preferred_alignment;
4595
  HOST_WIDE_INT size = get_frame_size ();
4596
 
4597
  frame->nregs = ix86_nsaved_regs ();
4598
  total_size = size;
4599
 
4600
  stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4601
  preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4602
 
4603
  /* During reload iteration the amount of registers saved can change.
4604
     Recompute the value as needed.  Do not recompute when amount of registers
4605
     didn't change as reload does multiple calls to the function and does not
4606
     expect the decision to change within single iteration.  */
4607
  if (!optimize_size
4608
      && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4609
    {
4610
      int count = frame->nregs;
4611
 
4612
      cfun->machine->use_fast_prologue_epilogue_nregs = count;
4613
      /* The fast prologue uses move instead of push to save registers.  This
4614
         is significantly longer, but also executes faster as modern hardware
4615
         can execute the moves in parallel, but can't do that for push/pop.
4616
 
4617
         Be careful about choosing what prologue to emit:  When function takes
4618
         many instructions to execute we may use slow version as well as in
4619
         case function is known to be outside hot spot (this is known with
4620
         feedback only).  Weight the size of function by number of registers
4621
         to save as it is cheap to use one or two push instructions but very
4622
         slow to use many of them.  */
4623
      if (count)
4624
        count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4625
      if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4626
          || (flag_branch_probabilities
4627
              && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4628
        cfun->machine->use_fast_prologue_epilogue = false;
4629
      else
4630
        cfun->machine->use_fast_prologue_epilogue
4631
           = !expensive_function_p (count);
4632
    }
4633
  if (TARGET_PROLOGUE_USING_MOVE
4634
      && cfun->machine->use_fast_prologue_epilogue)
4635
    frame->save_regs_using_mov = true;
4636
  else
4637
    frame->save_regs_using_mov = false;
4638
 
4639
 
4640
  /* Skip return address and saved base pointer.  */
4641
  offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4642
 
4643
  frame->hard_frame_pointer_offset = offset;
4644
 
4645
  /* Do some sanity checking of stack_alignment_needed and
4646
     preferred_alignment, since i386 port is the only using those features
4647
     that may break easily.  */
4648
 
4649
  gcc_assert (!size || stack_alignment_needed);
4650
  gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
4651
  gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4652
  gcc_assert (stack_alignment_needed
4653
              <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4654
 
4655
  if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4656
    stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4657
 
4658
  /* Register save area */
4659
  offset += frame->nregs * UNITS_PER_WORD;
4660
 
4661
  /* Va-arg area */
4662
  if (ix86_save_varrargs_registers)
4663
    {
4664
      offset += X86_64_VARARGS_SIZE;
4665
      frame->va_arg_size = X86_64_VARARGS_SIZE;
4666
    }
4667
  else
4668
    frame->va_arg_size = 0;
4669
 
4670
  /* Align start of frame for local function.  */
4671
  frame->padding1 = ((offset + stack_alignment_needed - 1)
4672
                     & -stack_alignment_needed) - offset;
4673
 
4674
  offset += frame->padding1;
4675
 
4676
  /* Frame pointer points here.  */
4677
  frame->frame_pointer_offset = offset;
4678
 
4679
  offset += size;
4680
 
4681
  /* Add outgoing arguments area.  Can be skipped if we eliminated
4682
     all the function calls as dead code.
4683
     Skipping is however impossible when function calls alloca.  Alloca
4684
     expander assumes that last current_function_outgoing_args_size
4685
     of stack frame are unused.  */
4686
  if (ACCUMULATE_OUTGOING_ARGS
4687
      && (!current_function_is_leaf || current_function_calls_alloca))
4688
    {
4689
      offset += current_function_outgoing_args_size;
4690
      frame->outgoing_arguments_size = current_function_outgoing_args_size;
4691
    }
4692
  else
4693
    frame->outgoing_arguments_size = 0;
4694
 
4695
  /* Align stack boundary.  Only needed if we're calling another function
4696
     or using alloca.  */
4697
  if (!current_function_is_leaf || current_function_calls_alloca)
4698
    frame->padding2 = ((offset + preferred_alignment - 1)
4699
                       & -preferred_alignment) - offset;
4700
  else
4701
    frame->padding2 = 0;
4702
 
4703
  offset += frame->padding2;
4704
 
4705
  /* We've reached end of stack frame.  */
4706
  frame->stack_pointer_offset = offset;
4707
 
4708
  /* Size prologue needs to allocate.  */
4709
  frame->to_allocate =
4710
    (size + frame->padding1 + frame->padding2
4711
     + frame->outgoing_arguments_size + frame->va_arg_size);
4712
 
4713
  if ((!frame->to_allocate && frame->nregs <= 1)
4714
      || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4715
    frame->save_regs_using_mov = false;
4716
 
4717
  if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4718
      && current_function_is_leaf)
4719
    {
4720
      frame->red_zone_size = frame->to_allocate;
4721
      if (frame->save_regs_using_mov)
4722
        frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4723
      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4724
        frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4725
    }
4726
  else
4727
    frame->red_zone_size = 0;
4728
  frame->to_allocate -= frame->red_zone_size;
4729
  frame->stack_pointer_offset -= frame->red_zone_size;
4730
#if 0
4731
  fprintf (stderr, "nregs: %i\n", frame->nregs);
4732
  fprintf (stderr, "size: %i\n", size);
4733
  fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4734
  fprintf (stderr, "padding1: %i\n", frame->padding1);
4735
  fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4736
  fprintf (stderr, "padding2: %i\n", frame->padding2);
4737
  fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4738
  fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4739
  fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4740
  fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4741
           frame->hard_frame_pointer_offset);
4742
  fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4743
#endif
4744
}
4745
 
4746
/* Emit code to save registers in the prologue.  */
4747
 
4748
static void
4749
ix86_emit_save_regs (void)
4750
{
4751
  unsigned int regno;
4752
  rtx insn;
4753
 
4754
  for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
4755
    if (ix86_save_reg (regno, true))
4756
      {
4757
        insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4758
        RTX_FRAME_RELATED_P (insn) = 1;
4759
      }
4760
}
4761
 
4762
/* Emit code to save registers using MOV insns.  First register
4763
   is restored from POINTER + OFFSET.  */
4764
static void
4765
ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4766
{
4767
  unsigned int regno;
4768
  rtx insn;
4769
 
4770
  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4771
    if (ix86_save_reg (regno, true))
4772
      {
4773
        insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4774
                                               Pmode, offset),
4775
                               gen_rtx_REG (Pmode, regno));
4776
        RTX_FRAME_RELATED_P (insn) = 1;
4777
        offset += UNITS_PER_WORD;
4778
      }
4779
}
4780
 
4781
/* Expand prologue or epilogue stack adjustment.
4782
   The pattern exist to put a dependency on all ebp-based memory accesses.
4783
   STYLE should be negative if instructions should be marked as frame related,
4784
   zero if %r11 register is live and cannot be freely used and positive
4785
   otherwise.  */
4786
 
4787
static void
4788
pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4789
{
4790
  rtx insn;
4791
 
4792
  if (! TARGET_64BIT)
4793
    insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4794
  else if (x86_64_immediate_operand (offset, DImode))
4795
    insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4796
  else
4797
    {
4798
      rtx r11;
4799
      /* r11 is used by indirect sibcall return as well, set before the
4800
         epilogue and used after the epilogue.  ATM indirect sibcall
4801
         shouldn't be used together with huge frame sizes in one
4802
         function because of the frame_size check in sibcall.c.  */
4803
      gcc_assert (style);
4804
      r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4805
      insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4806
      if (style < 0)
4807
        RTX_FRAME_RELATED_P (insn) = 1;
4808
      insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4809
                                                               offset));
4810
    }
4811
  if (style < 0)
4812
    RTX_FRAME_RELATED_P (insn) = 1;
4813
}
4814
 
4815
/* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
4816
 
4817
static rtx
4818
ix86_internal_arg_pointer (void)
4819
{
4820
  if (FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
4821
      && DECL_NAME (current_function_decl)
4822
      && MAIN_NAME_P (DECL_NAME (current_function_decl))
4823
      && DECL_FILE_SCOPE_P (current_function_decl))
4824
    {
4825
      cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
4826
      return copy_to_reg (cfun->machine->force_align_arg_pointer);
4827
    }
4828
  else
4829
    return virtual_incoming_args_rtx;
4830
}
4831
 
4832
/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
4833
   This is called from dwarf2out.c to emit call frame instructions
4834
   for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
4835
static void
4836
ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
4837
{
4838
  rtx unspec = SET_SRC (pattern);
4839
  gcc_assert (GET_CODE (unspec) == UNSPEC);
4840
 
4841
  switch (index)
4842
    {
4843
    case UNSPEC_REG_SAVE:
4844
      dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
4845
                              SET_DEST (pattern));
4846
      break;
4847
    case UNSPEC_DEF_CFA:
4848
      dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
4849
                         INTVAL (XVECEXP (unspec, 0, 0)));
4850
      break;
4851
    default:
4852
      gcc_unreachable ();
4853
    }
4854
}
4855
 
4856
/* Expand the prologue into a bunch of separate insns.  */
4857
 
4858
void
4859
ix86_expand_prologue (void)
4860
{
4861
  rtx insn;
4862
  bool pic_reg_used;
4863
  struct ix86_frame frame;
4864
  HOST_WIDE_INT allocate;
4865
 
4866
  ix86_compute_frame_layout (&frame);
4867
 
4868
  if (cfun->machine->force_align_arg_pointer)
4869
    {
4870
      rtx x, y;
4871
 
4872
      /* Grab the argument pointer.  */
4873
      x = plus_constant (stack_pointer_rtx, 4);
4874
      y = cfun->machine->force_align_arg_pointer;
4875
      insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
4876
      RTX_FRAME_RELATED_P (insn) = 1;
4877
 
4878
      /* The unwind info consists of two parts: install the fafp as the cfa,
4879
         and record the fafp as the "save register" of the stack pointer.
4880
         The later is there in order that the unwinder can see where it
4881
         should restore the stack pointer across the and insn.  */
4882
      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
4883
      x = gen_rtx_SET (VOIDmode, y, x);
4884
      RTX_FRAME_RELATED_P (x) = 1;
4885
      y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
4886
                          UNSPEC_REG_SAVE);
4887
      y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
4888
      RTX_FRAME_RELATED_P (y) = 1;
4889
      x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
4890
      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
4891
      REG_NOTES (insn) = x;
4892
 
4893
      /* Align the stack.  */
4894
      emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
4895
                             GEN_INT (-16)));
4896
 
4897
      /* And here we cheat like madmen with the unwind info.  We force the
4898
         cfa register back to sp+4, which is exactly what it was at the
4899
         start of the function.  Re-pushing the return address results in
4900
         the return at the same spot relative to the cfa, and thus is
4901
         correct wrt the unwind info.  */
4902
      x = cfun->machine->force_align_arg_pointer;
4903
      x = gen_frame_mem (Pmode, plus_constant (x, -4));
4904
      insn = emit_insn (gen_push (x));
4905
      RTX_FRAME_RELATED_P (insn) = 1;
4906
 
4907
      x = GEN_INT (4);
4908
      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
4909
      x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
4910
      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
4911
      REG_NOTES (insn) = x;
4912
    }
4913
 
4914
  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
4915
     slower on all targets.  Also sdb doesn't like it.  */
4916
 
4917
  if (frame_pointer_needed)
4918
    {
4919
      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4920
      RTX_FRAME_RELATED_P (insn) = 1;
4921
 
4922
      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4923
      RTX_FRAME_RELATED_P (insn) = 1;
4924
    }
4925
 
4926
  allocate = frame.to_allocate;
4927
 
4928
  if (!frame.save_regs_using_mov)
4929
    ix86_emit_save_regs ();
4930
  else
4931
    allocate += frame.nregs * UNITS_PER_WORD;
4932
 
4933
  /* When using red zone we may start register saving before allocating
4934
     the stack frame saving one cycle of the prologue.  */
4935
  if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4936
    ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4937
                                   : stack_pointer_rtx,
4938
                                   -frame.nregs * UNITS_PER_WORD);
4939
 
4940
  if (allocate == 0)
4941
    ;
4942
  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4943
    pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4944
                               GEN_INT (-allocate), -1);
4945
  else
4946
    {
4947
      /* Only valid for Win32.  */
4948
      rtx eax = gen_rtx_REG (SImode, 0);
4949
      bool eax_live = ix86_eax_live_at_start_p ();
4950
      rtx t;
4951
 
4952
      gcc_assert (!TARGET_64BIT);
4953
 
4954
      if (eax_live)
4955
        {
4956
          emit_insn (gen_push (eax));
4957
          allocate -= 4;
4958
        }
4959
 
4960
      emit_move_insn (eax, GEN_INT (allocate));
4961
 
4962
      insn = emit_insn (gen_allocate_stack_worker (eax));
4963
      RTX_FRAME_RELATED_P (insn) = 1;
4964
      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4965
      t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4966
      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4967
                                            t, REG_NOTES (insn));
4968
 
4969
      if (eax_live)
4970
        {
4971
          if (frame_pointer_needed)
4972
            t = plus_constant (hard_frame_pointer_rtx,
4973
                               allocate
4974
                               - frame.to_allocate
4975
                               - frame.nregs * UNITS_PER_WORD);
4976
          else
4977
            t = plus_constant (stack_pointer_rtx, allocate);
4978
          emit_move_insn (eax, gen_rtx_MEM (SImode, t));
4979
        }
4980
    }
4981
 
4982
  if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
4983
    {
4984
      if (!frame_pointer_needed || !frame.to_allocate)
4985
        ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4986
      else
4987
        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4988
                                       -frame.nregs * UNITS_PER_WORD);
4989
    }
4990
 
4991
  pic_reg_used = false;
4992
  if (pic_offset_table_rtx
4993
      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4994
          || current_function_profile))
4995
    {
4996
      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4997
 
4998
      if (alt_pic_reg_used != INVALID_REGNUM)
4999
        REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5000
 
5001
      pic_reg_used = true;
5002
    }
5003
 
5004
  if (pic_reg_used)
5005
    {
5006
      if (TARGET_64BIT)
5007
        insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5008
      else
5009
        insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5010
 
5011
      /* Even with accurate pre-reload life analysis, we can wind up
5012
         deleting all references to the pic register after reload.
5013
         Consider if cross-jumping unifies two sides of a branch
5014
         controlled by a comparison vs the only read from a global.
5015
         In which case, allow the set_got to be deleted, though we're
5016
         too late to do anything about the ebx save in the prologue.  */
5017
      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5018
    }
5019
 
5020
  /* Prevent function calls from be scheduled before the call to mcount.
5021
     In the pic_reg_used case, make sure that the got load isn't deleted.  */
5022
  if (current_function_profile)
5023
    emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5024
}
5025
 
5026
/* Emit code to restore saved registers using MOV insns.  First register
5027
   is restored from POINTER + OFFSET.  */
5028
static void
5029
ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5030
                                  int maybe_eh_return)
5031
{
5032
  int regno;
5033
  rtx base_address = gen_rtx_MEM (Pmode, pointer);
5034
 
5035
  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5036
    if (ix86_save_reg (regno, maybe_eh_return))
5037
      {
5038
        /* Ensure that adjust_address won't be forced to produce pointer
5039
           out of range allowed by x86-64 instruction set.  */
5040
        if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5041
          {
5042
            rtx r11;
5043
 
5044
            r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5045
            emit_move_insn (r11, GEN_INT (offset));
5046
            emit_insn (gen_adddi3 (r11, r11, pointer));
5047
            base_address = gen_rtx_MEM (Pmode, r11);
5048
            offset = 0;
5049
          }
5050
        emit_move_insn (gen_rtx_REG (Pmode, regno),
5051
                        adjust_address (base_address, Pmode, offset));
5052
        offset += UNITS_PER_WORD;
5053
      }
5054
}
5055
 
5056
/* Restore function stack, frame, and registers.  */
5057
 
5058
void
5059
ix86_expand_epilogue (int style)
5060
{
5061
  int regno;
5062
  int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5063
  struct ix86_frame frame;
5064
  HOST_WIDE_INT offset;
5065
 
5066
  ix86_compute_frame_layout (&frame);
5067
 
5068
  /* Calculate start of saved registers relative to ebp.  Special care
5069
     must be taken for the normal return case of a function using
5070
     eh_return: the eax and edx registers are marked as saved, but not
5071
     restored along this path.  */
5072
  offset = frame.nregs;
5073
  if (current_function_calls_eh_return && style != 2)
5074
    offset -= 2;
5075
  offset *= -UNITS_PER_WORD;
5076
 
5077
  /* If we're only restoring one register and sp is not valid then
5078
     using a move instruction to restore the register since it's
5079
     less work than reloading sp and popping the register.
5080
 
5081
     The default code result in stack adjustment using add/lea instruction,
5082
     while this code results in LEAVE instruction (or discrete equivalent),
5083
     so it is profitable in some other cases as well.  Especially when there
5084
     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
5085
     and there is exactly one register to pop. This heuristic may need some
5086
     tuning in future.  */
5087
  if ((!sp_valid && frame.nregs <= 1)
5088
      || (TARGET_EPILOGUE_USING_MOVE
5089
          && cfun->machine->use_fast_prologue_epilogue
5090
          && (frame.nregs > 1 || frame.to_allocate))
5091
      || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5092
      || (frame_pointer_needed && TARGET_USE_LEAVE
5093
          && cfun->machine->use_fast_prologue_epilogue
5094
          && frame.nregs == 1)
5095
      || current_function_calls_eh_return)
5096
    {
5097
      /* Restore registers.  We can use ebp or esp to address the memory
5098
         locations.  If both are available, default to ebp, since offsets
5099
         are known to be small.  Only exception is esp pointing directly to the
5100
         end of block of saved registers, where we may simplify addressing
5101
         mode.  */
5102
 
5103
      if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5104
        ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5105
                                          frame.to_allocate, style == 2);
5106
      else
5107
        ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5108
                                          offset, style == 2);
5109
 
5110
      /* eh_return epilogues need %ecx added to the stack pointer.  */
5111
      if (style == 2)
5112
        {
5113
          rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5114
 
5115
          if (frame_pointer_needed)
5116
            {
5117
              tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5118
              tmp = plus_constant (tmp, UNITS_PER_WORD);
5119
              emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5120
 
5121
              tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5122
              emit_move_insn (hard_frame_pointer_rtx, tmp);
5123
 
5124
              pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5125
                                         const0_rtx, style);
5126
            }
5127
          else
5128
            {
5129
              tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5130
              tmp = plus_constant (tmp, (frame.to_allocate
5131
                                         + frame.nregs * UNITS_PER_WORD));
5132
              emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5133
            }
5134
        }
5135
      else if (!frame_pointer_needed)
5136
        pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5137
                                   GEN_INT (frame.to_allocate
5138
                                            + frame.nregs * UNITS_PER_WORD),
5139
                                   style);
5140
      /* If not an i386, mov & pop is faster than "leave".  */
5141
      else if (TARGET_USE_LEAVE || optimize_size
5142
               || !cfun->machine->use_fast_prologue_epilogue)
5143
        emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5144
      else
5145
        {
5146
          pro_epilogue_adjust_stack (stack_pointer_rtx,
5147
                                     hard_frame_pointer_rtx,
5148
                                     const0_rtx, style);
5149
          if (TARGET_64BIT)
5150
            emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5151
          else
5152
            emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5153
        }
5154
    }
5155
  else
5156
    {
5157
      /* First step is to deallocate the stack frame so that we can
5158
         pop the registers.  */
5159
      if (!sp_valid)
5160
        {
5161
          gcc_assert (frame_pointer_needed);
5162
          pro_epilogue_adjust_stack (stack_pointer_rtx,
5163
                                     hard_frame_pointer_rtx,
5164
                                     GEN_INT (offset), style);
5165
        }
5166
      else if (frame.to_allocate)
5167
        pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5168
                                   GEN_INT (frame.to_allocate), style);
5169
 
5170
      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5171
        if (ix86_save_reg (regno, false))
5172
          {
5173
            if (TARGET_64BIT)
5174
              emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5175
            else
5176
              emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5177
          }
5178
      if (frame_pointer_needed)
5179
        {
5180
          /* Leave results in shorter dependency chains on CPUs that are
5181
             able to grok it fast.  */
5182
          if (TARGET_USE_LEAVE)
5183
            emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5184
          else if (TARGET_64BIT)
5185
            emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5186
          else
5187
            emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5188
        }
5189
    }
5190
 
5191
  if (cfun->machine->force_align_arg_pointer)
5192
    {
5193
      emit_insn (gen_addsi3 (stack_pointer_rtx,
5194
                             cfun->machine->force_align_arg_pointer,
5195
                             GEN_INT (-4)));
5196
    }
5197
 
5198
  /* Sibcall epilogues don't want a return instruction.  */
5199
  if (style == 0)
5200
    return;
5201
 
5202
  if (current_function_pops_args && current_function_args_size)
5203
    {
5204
      rtx popc = GEN_INT (current_function_pops_args);
5205
 
5206
      /* i386 can only pop 64K bytes.  If asked to pop more, pop
5207
         return address, do explicit add, and jump indirectly to the
5208
         caller.  */
5209
 
5210
      if (current_function_pops_args >= 65536)
5211
        {
5212
          rtx ecx = gen_rtx_REG (SImode, 2);
5213
 
5214
          /* There is no "pascal" calling convention in 64bit ABI.  */
5215
          gcc_assert (!TARGET_64BIT);
5216
 
5217
          emit_insn (gen_popsi1 (ecx));
5218
          emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5219
          emit_jump_insn (gen_return_indirect_internal (ecx));
5220
        }
5221
      else
5222
        emit_jump_insn (gen_return_pop_internal (popc));
5223
    }
5224
  else
5225
    emit_jump_insn (gen_return_internal ());
5226
}
5227
 
5228
/* Reset from the function's potential modifications.  */
5229
 
5230
static void
5231
ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5232
                               HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5233
{
5234
  if (pic_offset_table_rtx)
5235
    REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5236
}
5237
 
5238
/* Extract the parts of an RTL expression that is a valid memory address
5239
   for an instruction.  Return 0 if the structure of the address is
5240
   grossly off.  Return -1 if the address contains ASHIFT, so it is not
5241
   strictly valid, but still used for computing length of lea instruction.  */
5242
 
5243
int
5244
ix86_decompose_address (rtx addr, struct ix86_address *out)
5245
{
5246
  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5247
  rtx base_reg, index_reg;
5248
  HOST_WIDE_INT scale = 1;
5249
  rtx scale_rtx = NULL_RTX;
5250
  int retval = 1;
5251
  enum ix86_address_seg seg = SEG_DEFAULT;
5252
 
5253
  if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5254
    base = addr;
5255
  else if (GET_CODE (addr) == PLUS)
5256
    {
5257
      rtx addends[4], op;
5258
      int n = 0, i;
5259
 
5260
      op = addr;
5261
      do
5262
        {
5263
          if (n >= 4)
5264
            return 0;
5265
          addends[n++] = XEXP (op, 1);
5266
          op = XEXP (op, 0);
5267
        }
5268
      while (GET_CODE (op) == PLUS);
5269
      if (n >= 4)
5270
        return 0;
5271
      addends[n] = op;
5272
 
5273
      for (i = n; i >= 0; --i)
5274
        {
5275
          op = addends[i];
5276
          switch (GET_CODE (op))
5277
            {
5278
            case MULT:
5279
              if (index)
5280
                return 0;
5281
              index = XEXP (op, 0);
5282
              scale_rtx = XEXP (op, 1);
5283
              break;
5284
 
5285
            case UNSPEC:
5286
              if (XINT (op, 1) == UNSPEC_TP
5287
                  && TARGET_TLS_DIRECT_SEG_REFS
5288
                  && seg == SEG_DEFAULT)
5289
                seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5290
              else
5291
                return 0;
5292
              break;
5293
 
5294
            case REG:
5295
            case SUBREG:
5296
              if (!base)
5297
                base = op;
5298
              else if (!index)
5299
                index = op;
5300
              else
5301
                return 0;
5302
              break;
5303
 
5304
            case CONST:
5305
            case CONST_INT:
5306
            case SYMBOL_REF:
5307
            case LABEL_REF:
5308
              if (disp)
5309
                return 0;
5310
              disp = op;
5311
              break;
5312
 
5313
            default:
5314
              return 0;
5315
            }
5316
        }
5317
    }
5318
  else if (GET_CODE (addr) == MULT)
5319
    {
5320
      index = XEXP (addr, 0);            /* index*scale */
5321
      scale_rtx = XEXP (addr, 1);
5322
    }
5323
  else if (GET_CODE (addr) == ASHIFT)
5324
    {
5325
      rtx tmp;
5326
 
5327
      /* We're called for lea too, which implements ashift on occasion.  */
5328
      index = XEXP (addr, 0);
5329
      tmp = XEXP (addr, 1);
5330
      if (GET_CODE (tmp) != CONST_INT)
5331
        return 0;
5332
      scale = INTVAL (tmp);
5333
      if ((unsigned HOST_WIDE_INT) scale > 3)
5334
        return 0;
5335
      scale = 1 << scale;
5336
      retval = -1;
5337
    }
5338
  else
5339
    disp = addr;                        /* displacement */
5340
 
5341
  /* Extract the integral value of scale.  */
5342
  if (scale_rtx)
5343
    {
5344
      if (GET_CODE (scale_rtx) != CONST_INT)
5345
        return 0;
5346
      scale = INTVAL (scale_rtx);
5347
    }
5348
 
5349
  base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5350
  index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5351
 
5352
  /* Allow arg pointer and stack pointer as index if there is not scaling.  */
5353
  if (base_reg && index_reg && scale == 1
5354
      && (index_reg == arg_pointer_rtx
5355
          || index_reg == frame_pointer_rtx
5356
          || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5357
    {
5358
      rtx tmp;
5359
      tmp = base, base = index, index = tmp;
5360
      tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5361
    }
5362
 
5363
  /* Special case: %ebp cannot be encoded as a base without a displacement.  */
5364
  if ((base_reg == hard_frame_pointer_rtx
5365
       || base_reg == frame_pointer_rtx
5366
       || base_reg == arg_pointer_rtx) && !disp)
5367
    disp = const0_rtx;
5368
 
5369
  /* Special case: on K6, [%esi] makes the instruction vector decoded.
5370
     Avoid this by transforming to [%esi+0].  */
5371
  if (ix86_tune == PROCESSOR_K6 && !optimize_size
5372
      && base_reg && !index_reg && !disp
5373
      && REG_P (base_reg)
5374
      && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5375
    disp = const0_rtx;
5376
 
5377
  /* Special case: encode reg+reg instead of reg*2.  */
5378
  if (!base && index && scale && scale == 2)
5379
    base = index, base_reg = index_reg, scale = 1;
5380
 
5381
  /* Special case: scaling cannot be encoded without base or displacement.  */
5382
  if (!base && !disp && index && scale != 1)
5383
    disp = const0_rtx;
5384
 
5385
  out->base = base;
5386
  out->index = index;
5387
  out->disp = disp;
5388
  out->scale = scale;
5389
  out->seg = seg;
5390
 
5391
  return retval;
5392
}
5393
 
5394
/* Return cost of the memory address x.
5395
   For i386, it is better to use a complex address than let gcc copy
5396
   the address into a reg and make a new pseudo.  But not if the address
5397
   requires to two regs - that would mean more pseudos with longer
5398
   lifetimes.  */
5399
static int
5400
ix86_address_cost (rtx x)
5401
{
5402
  struct ix86_address parts;
5403
  int cost = 1;
5404
  int ok = ix86_decompose_address (x, &parts);
5405
 
5406
  gcc_assert (ok);
5407
 
5408
  if (parts.base && GET_CODE (parts.base) == SUBREG)
5409
    parts.base = SUBREG_REG (parts.base);
5410
  if (parts.index && GET_CODE (parts.index) == SUBREG)
5411
    parts.index = SUBREG_REG (parts.index);
5412
 
5413
  /* More complex memory references are better.  */
5414
  if (parts.disp && parts.disp != const0_rtx)
5415
    cost--;
5416
  if (parts.seg != SEG_DEFAULT)
5417
    cost--;
5418
 
5419
  /* Attempt to minimize number of registers in the address.  */
5420
  if ((parts.base
5421
       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5422
      || (parts.index
5423
          && (!REG_P (parts.index)
5424
              || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5425
    cost++;
5426
 
5427
  if (parts.base
5428
      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5429
      && parts.index
5430
      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5431
      && parts.base != parts.index)
5432
    cost++;
5433
 
5434
  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5435
     since it's predecode logic can't detect the length of instructions
5436
     and it degenerates to vector decoded.  Increase cost of such
5437
     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
5438
     to split such addresses or even refuse such addresses at all.
5439
 
5440
     Following addressing modes are affected:
5441
      [base+scale*index]
5442
      [scale*index+disp]
5443
      [base+index]
5444
 
5445
     The first and last case  may be avoidable by explicitly coding the zero in
5446
     memory address, but I don't have AMD-K6 machine handy to check this
5447
     theory.  */
5448
 
5449
  if (TARGET_K6
5450
      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5451
          || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5452
          || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5453
    cost += 10;
5454
 
5455
  return cost;
5456
}
5457
 
5458
/* If X is a machine specific address (i.e. a symbol or label being
5459
   referenced as a displacement from the GOT implemented using an
5460
   UNSPEC), then return the base term.  Otherwise return X.  */
5461
 
5462
rtx
5463
ix86_find_base_term (rtx x)
5464
{
5465
  rtx term;
5466
 
5467
  if (TARGET_64BIT)
5468
    {
5469
      if (GET_CODE (x) != CONST)
5470
        return x;
5471
      term = XEXP (x, 0);
5472
      if (GET_CODE (term) == PLUS
5473
          && (GET_CODE (XEXP (term, 1)) == CONST_INT
5474
              || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5475
        term = XEXP (term, 0);
5476
      if (GET_CODE (term) != UNSPEC
5477
          || XINT (term, 1) != UNSPEC_GOTPCREL)
5478
        return x;
5479
 
5480
      term = XVECEXP (term, 0, 0);
5481
 
5482
      if (GET_CODE (term) != SYMBOL_REF
5483
          && GET_CODE (term) != LABEL_REF)
5484
        return x;
5485
 
5486
      return term;
5487
    }
5488
 
5489
  term = ix86_delegitimize_address (x);
5490
 
5491
  if (GET_CODE (term) != SYMBOL_REF
5492
      && GET_CODE (term) != LABEL_REF)
5493
    return x;
5494
 
5495
  return term;
5496
}
5497
 
5498
/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5499
   this is used for to form addresses to local data when -fPIC is in
5500
   use.  */
5501
 
5502
static bool
5503
darwin_local_data_pic (rtx disp)
5504
{
5505
  if (GET_CODE (disp) == MINUS)
5506
    {
5507
      if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5508
          || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5509
        if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5510
          {
5511
            const char *sym_name = XSTR (XEXP (disp, 1), 0);
5512
            if (! strcmp (sym_name, "<pic base>"))
5513
              return true;
5514
          }
5515
    }
5516
 
5517
  return false;
5518
}
5519
 
5520
/* Determine if a given RTX is a valid constant.  We already know this
5521
   satisfies CONSTANT_P.  */
5522
 
5523
bool
5524
legitimate_constant_p (rtx x)
5525
{
5526
  switch (GET_CODE (x))
5527
    {
5528
    case CONST:
5529
      x = XEXP (x, 0);
5530
 
5531
      if (GET_CODE (x) == PLUS)
5532
        {
5533
          if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5534
            return false;
5535
          x = XEXP (x, 0);
5536
        }
5537
 
5538
      if (TARGET_MACHO && darwin_local_data_pic (x))
5539
        return true;
5540
 
5541
      /* Only some unspecs are valid as "constants".  */
5542
      if (GET_CODE (x) == UNSPEC)
5543
        switch (XINT (x, 1))
5544
          {
5545
          case UNSPEC_GOTOFF:
5546
            return TARGET_64BIT;
5547
          case UNSPEC_TPOFF:
5548
          case UNSPEC_NTPOFF:
5549
            x = XVECEXP (x, 0, 0);
5550
            return (GET_CODE (x) == SYMBOL_REF
5551
                    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5552
          case UNSPEC_DTPOFF:
5553
            x = XVECEXP (x, 0, 0);
5554
            return (GET_CODE (x) == SYMBOL_REF
5555
                    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
5556
          default:
5557
            return false;
5558
          }
5559
 
5560
      /* We must have drilled down to a symbol.  */
5561
      if (GET_CODE (x) == LABEL_REF)
5562
        return true;
5563
      if (GET_CODE (x) != SYMBOL_REF)
5564
        return false;
5565
      /* FALLTHRU */
5566
 
5567
    case SYMBOL_REF:
5568
      /* TLS symbols are never valid.  */
5569
      if (SYMBOL_REF_TLS_MODEL (x))
5570
        return false;
5571
      break;
5572
 
5573
    case CONST_DOUBLE:
5574
      if (GET_MODE (x) == TImode
5575
          && x != CONST0_RTX (TImode)
5576
          && !TARGET_64BIT)
5577
        return false;
5578
      break;
5579
 
5580
    case CONST_VECTOR:
5581
      if (x == CONST0_RTX (GET_MODE (x)))
5582
        return true;
5583
      return false;
5584
 
5585
    default:
5586
      break;
5587
    }
5588
 
5589
  /* Otherwise we handle everything else in the move patterns.  */
5590
  return true;
5591
}
5592
 
5593
/* Determine if it's legal to put X into the constant pool.  This
5594
   is not possible for the address of thread-local symbols, which
5595
   is checked above.  */
5596
 
5597
static bool
5598
ix86_cannot_force_const_mem (rtx x)
5599
{
5600
  /* We can always put integral constants and vectors in memory.  */
5601
  switch (GET_CODE (x))
5602
    {
5603
    case CONST_INT:
5604
    case CONST_DOUBLE:
5605
    case CONST_VECTOR:
5606
      return false;
5607
 
5608
    default:
5609
      break;
5610
    }
5611
  return !legitimate_constant_p (x);
5612
}
5613
 
5614
/* Determine if a given RTX is a valid constant address.  */
5615
 
5616
bool
5617
constant_address_p (rtx x)
5618
{
5619
  return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5620
}
5621
 
5622
/* Nonzero if the constant value X is a legitimate general operand
5623
   when generating PIC code.  It is given that flag_pic is on and
5624
   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
5625
 
5626
bool
5627
legitimate_pic_operand_p (rtx x)
5628
{
5629
  rtx inner;
5630
 
5631
  switch (GET_CODE (x))
5632
    {
5633
    case CONST:
5634
      inner = XEXP (x, 0);
5635
      if (GET_CODE (inner) == PLUS
5636
          && GET_CODE (XEXP (inner, 1)) == CONST_INT)
5637
        inner = XEXP (inner, 0);
5638
 
5639
      /* Only some unspecs are valid as "constants".  */
5640
      if (GET_CODE (inner) == UNSPEC)
5641
        switch (XINT (inner, 1))
5642
          {
5643
          case UNSPEC_GOTOFF:
5644
            return TARGET_64BIT;
5645
          case UNSPEC_TPOFF:
5646
            x = XVECEXP (inner, 0, 0);
5647
            return (GET_CODE (x) == SYMBOL_REF
5648
                    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5649
          default:
5650
            return false;
5651
          }
5652
      /* FALLTHRU */
5653
 
5654
    case SYMBOL_REF:
5655
    case LABEL_REF:
5656
      return legitimate_pic_address_disp_p (x);
5657
 
5658
    default:
5659
      return true;
5660
    }
5661
}
5662
 
5663
/* Determine if a given CONST RTX is a valid memory displacement
5664
   in PIC mode.  */
5665
 
5666
int
5667
legitimate_pic_address_disp_p (rtx disp)
5668
{
5669
  bool saw_plus;
5670
 
5671
  /* In 64bit mode we can allow direct addresses of symbols and labels
5672
     when they are not dynamic symbols.  */
5673
  if (TARGET_64BIT)
5674
    {
5675
      rtx op0 = disp, op1;
5676
 
5677
      switch (GET_CODE (disp))
5678
        {
5679
        case LABEL_REF:
5680
          return true;
5681
 
5682
        case CONST:
5683
          if (GET_CODE (XEXP (disp, 0)) != PLUS)
5684
            break;
5685
          op0 = XEXP (XEXP (disp, 0), 0);
5686
          op1 = XEXP (XEXP (disp, 0), 1);
5687
          if (GET_CODE (op1) != CONST_INT
5688
              || INTVAL (op1) >= 16*1024*1024
5689
              || INTVAL (op1) < -16*1024*1024)
5690
            break;
5691
          if (GET_CODE (op0) == LABEL_REF)
5692
            return true;
5693
          if (GET_CODE (op0) != SYMBOL_REF)
5694
            break;
5695
          /* FALLTHRU */
5696
 
5697
        case SYMBOL_REF:
5698
          /* TLS references should always be enclosed in UNSPEC.  */
5699
          if (SYMBOL_REF_TLS_MODEL (op0))
5700
            return false;
5701
          if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
5702
            return true;
5703
          break;
5704
 
5705
        default:
5706
          break;
5707
        }
5708
    }
5709
  if (GET_CODE (disp) != CONST)
5710
    return 0;
5711
  disp = XEXP (disp, 0);
5712
 
5713
  if (TARGET_64BIT)
5714
    {
5715
      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
5716
         of GOT tables.  We should not need these anyway.  */
5717
      if (GET_CODE (disp) != UNSPEC
5718
          || (XINT (disp, 1) != UNSPEC_GOTPCREL
5719
              && XINT (disp, 1) != UNSPEC_GOTOFF))
5720
        return 0;
5721
 
5722
      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5723
          && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5724
        return 0;
5725
      return 1;
5726
    }
5727
 
5728
  saw_plus = false;
5729
  if (GET_CODE (disp) == PLUS)
5730
    {
5731
      if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5732
        return 0;
5733
      disp = XEXP (disp, 0);
5734
      saw_plus = true;
5735
    }
5736
 
5737
  if (TARGET_MACHO && darwin_local_data_pic (disp))
5738
    return 1;
5739
 
5740
  if (GET_CODE (disp) != UNSPEC)
5741
    return 0;
5742
 
5743
  switch (XINT (disp, 1))
5744
    {
5745
    case UNSPEC_GOT:
5746
      if (saw_plus)
5747
        return false;
5748
      return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5749
    case UNSPEC_GOTOFF:
5750
      /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
5751
         While ABI specify also 32bit relocation but we don't produce it in
5752
         small PIC model at all.  */
5753
      if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5754
           || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5755
          && !TARGET_64BIT)
5756
        return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5757
      return false;
5758
    case UNSPEC_GOTTPOFF:
5759
    case UNSPEC_GOTNTPOFF:
5760
    case UNSPEC_INDNTPOFF:
5761
      if (saw_plus)
5762
        return false;
5763
      disp = XVECEXP (disp, 0, 0);
5764
      return (GET_CODE (disp) == SYMBOL_REF
5765
              && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
5766
    case UNSPEC_NTPOFF:
5767
      disp = XVECEXP (disp, 0, 0);
5768
      return (GET_CODE (disp) == SYMBOL_REF
5769
              && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
5770
    case UNSPEC_DTPOFF:
5771
      disp = XVECEXP (disp, 0, 0);
5772
      return (GET_CODE (disp) == SYMBOL_REF
5773
              && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
5774
    }
5775
 
5776
  return 0;
5777
}
5778
 
5779
/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5780
   memory address for an instruction.  The MODE argument is the machine mode
5781
   for the MEM expression that wants to use this address.
5782
 
5783
   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
5784
   convert common non-canonical forms to canonical form so that they will
5785
   be recognized.  */
5786
 
5787
int
5788
legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5789
{
5790
  struct ix86_address parts;
5791
  rtx base, index, disp;
5792
  HOST_WIDE_INT scale;
5793
  const char *reason = NULL;
5794
  rtx reason_rtx = NULL_RTX;
5795
 
5796
  if (TARGET_DEBUG_ADDR)
5797
    {
5798
      fprintf (stderr,
5799
               "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5800
               GET_MODE_NAME (mode), strict);
5801
      debug_rtx (addr);
5802
    }
5803
 
5804
  if (ix86_decompose_address (addr, &parts) <= 0)
5805
    {
5806
      reason = "decomposition failed";
5807
      goto report_error;
5808
    }
5809
 
5810
  base = parts.base;
5811
  index = parts.index;
5812
  disp = parts.disp;
5813
  scale = parts.scale;
5814
 
5815
  /* Validate base register.
5816
 
5817
     Don't allow SUBREG's that span more than a word here.  It can lead to spill
5818
     failures when the base is one word out of a two word structure, which is
5819
     represented internally as a DImode int.  */
5820
 
5821
  if (base)
5822
    {
5823
      rtx reg;
5824
      reason_rtx = base;
5825
 
5826
      if (REG_P (base))
5827
        reg = base;
5828
      else if (GET_CODE (base) == SUBREG
5829
               && REG_P (SUBREG_REG (base))
5830
               && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
5831
                  <= UNITS_PER_WORD)
5832
        reg = SUBREG_REG (base);
5833
      else
5834
        {
5835
          reason = "base is not a register";
5836
          goto report_error;
5837
        }
5838
 
5839
      if (GET_MODE (base) != Pmode)
5840
        {
5841
          reason = "base is not in Pmode";
5842
          goto report_error;
5843
        }
5844
 
5845
      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5846
          || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5847
        {
5848
          reason = "base is not valid";
5849
          goto report_error;
5850
        }
5851
    }
5852
 
5853
  /* Validate index register.
5854
 
5855
     Don't allow SUBREG's that span more than a word here -- same as above.  */
5856
 
5857
  if (index)
5858
    {
5859
      rtx reg;
5860
      reason_rtx = index;
5861
 
5862
      if (REG_P (index))
5863
        reg = index;
5864
      else if (GET_CODE (index) == SUBREG
5865
               && REG_P (SUBREG_REG (index))
5866
               && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
5867
                  <= UNITS_PER_WORD)
5868
        reg = SUBREG_REG (index);
5869
      else
5870
        {
5871
          reason = "index is not a register";
5872
          goto report_error;
5873
        }
5874
 
5875
      if (GET_MODE (index) != Pmode)
5876
        {
5877
          reason = "index is not in Pmode";
5878
          goto report_error;
5879
        }
5880
 
5881
      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5882
          || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5883
        {
5884
          reason = "index is not valid";
5885
          goto report_error;
5886
        }
5887
    }
5888
 
5889
  /* Validate scale factor.  */
5890
  if (scale != 1)
5891
    {
5892
      reason_rtx = GEN_INT (scale);
5893
      if (!index)
5894
        {
5895
          reason = "scale without index";
5896
          goto report_error;
5897
        }
5898
 
5899
      if (scale != 2 && scale != 4 && scale != 8)
5900
        {
5901
          reason = "scale is not a valid multiplier";
5902
          goto report_error;
5903
        }
5904
    }
5905
 
5906
  /* Validate displacement.  */
5907
  if (disp)
5908
    {
5909
      reason_rtx = disp;
5910
 
5911
      if (GET_CODE (disp) == CONST
5912
          && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5913
        switch (XINT (XEXP (disp, 0), 1))
5914
          {
5915
          /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
5916
             used.  While ABI specify also 32bit relocations, we don't produce
5917
             them at all and use IP relative instead.  */
5918
          case UNSPEC_GOT:
5919
          case UNSPEC_GOTOFF:
5920
            gcc_assert (flag_pic);
5921
            if (!TARGET_64BIT)
5922
              goto is_legitimate_pic;
5923
            reason = "64bit address unspec";
5924
            goto report_error;
5925
 
5926
          case UNSPEC_GOTPCREL:
5927
            gcc_assert (flag_pic);
5928
            goto is_legitimate_pic;
5929
 
5930
          case UNSPEC_GOTTPOFF:
5931
          case UNSPEC_GOTNTPOFF:
5932
          case UNSPEC_INDNTPOFF:
5933
          case UNSPEC_NTPOFF:
5934
          case UNSPEC_DTPOFF:
5935
            break;
5936
 
5937
          default:
5938
            reason = "invalid address unspec";
5939
            goto report_error;
5940
          }
5941
 
5942
      else if (flag_pic && (SYMBOLIC_CONST (disp)
5943
#if TARGET_MACHO
5944
                            && !machopic_operand_p (disp)
5945
#endif
5946
                            ))
5947
        {
5948
        is_legitimate_pic:
5949
          if (TARGET_64BIT && (index || base))
5950
            {
5951
              /* foo@dtpoff(%rX) is ok.  */
5952
              if (GET_CODE (disp) != CONST
5953
                  || GET_CODE (XEXP (disp, 0)) != PLUS
5954
                  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5955
                  || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5956
                  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5957
                      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5958
                {
5959
                  reason = "non-constant pic memory reference";
5960
                  goto report_error;
5961
                }
5962
            }
5963
          else if (! legitimate_pic_address_disp_p (disp))
5964
            {
5965
              reason = "displacement is an invalid pic construct";
5966
              goto report_error;
5967
            }
5968
 
5969
          /* This code used to verify that a symbolic pic displacement
5970
             includes the pic_offset_table_rtx register.
5971
 
5972
             While this is good idea, unfortunately these constructs may
5973
             be created by "adds using lea" optimization for incorrect
5974
             code like:
5975
 
5976
             int a;
5977
             int foo(int i)
5978
               {
5979
                 return *(&a+i);
5980
               }
5981
 
5982
             This code is nonsensical, but results in addressing
5983
             GOT table with pic_offset_table_rtx base.  We can't
5984
             just refuse it easily, since it gets matched by
5985
             "addsi3" pattern, that later gets split to lea in the
5986
             case output register differs from input.  While this
5987
             can be handled by separate addsi pattern for this case
5988
             that never results in lea, this seems to be easier and
5989
             correct fix for crash to disable this test.  */
5990
        }
5991
      else if (GET_CODE (disp) != LABEL_REF
5992
               && GET_CODE (disp) != CONST_INT
5993
               && (GET_CODE (disp) != CONST
5994
                   || !legitimate_constant_p (disp))
5995
               && (GET_CODE (disp) != SYMBOL_REF
5996
                   || !legitimate_constant_p (disp)))
5997
        {
5998
          reason = "displacement is not constant";
5999
          goto report_error;
6000
        }
6001
      else if (TARGET_64BIT
6002
               && !x86_64_immediate_operand (disp, VOIDmode))
6003
        {
6004
          reason = "displacement is out of range";
6005
          goto report_error;
6006
        }
6007
    }
6008
 
6009
  /* Everything looks valid.  */
6010
  if (TARGET_DEBUG_ADDR)
6011
    fprintf (stderr, "Success.\n");
6012
  return TRUE;
6013
 
6014
 report_error:
6015
  if (TARGET_DEBUG_ADDR)
6016
    {
6017
      fprintf (stderr, "Error: %s\n", reason);
6018
      debug_rtx (reason_rtx);
6019
    }
6020
  return FALSE;
6021
}
6022
 
6023
/* Return a unique alias set for the GOT.  */
6024
 
6025
static HOST_WIDE_INT
6026
ix86_GOT_alias_set (void)
6027
{
6028
  static HOST_WIDE_INT set = -1;
6029
  if (set == -1)
6030
    set = new_alias_set ();
6031
  return set;
6032
}
6033
 
6034
/* Return a legitimate reference for ORIG (an address) using the
6035
   register REG.  If REG is 0, a new pseudo is generated.
6036
 
6037
   There are two types of references that must be handled:
6038
 
6039
   1. Global data references must load the address from the GOT, via
6040
      the PIC reg.  An insn is emitted to do this load, and the reg is
6041
      returned.
6042
 
6043
   2. Static data references, constant pool addresses, and code labels
6044
      compute the address as an offset from the GOT, whose base is in
6045
      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
6046
      differentiate them from global data objects.  The returned
6047
      address is the PIC reg + an unspec constant.
6048
 
6049
   GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6050
   reg also appears in the address.  */
6051
 
6052
static rtx
6053
legitimize_pic_address (rtx orig, rtx reg)
6054
{
6055
  rtx addr = orig;
6056
  rtx new = orig;
6057
  rtx base;
6058
 
6059
#if TARGET_MACHO
6060
  if (reg == 0)
6061
    reg = gen_reg_rtx (Pmode);
6062
  /* Use the generic Mach-O PIC machinery.  */
6063
  return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6064
#endif
6065
 
6066
  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6067
    new = addr;
6068
  else if (TARGET_64BIT
6069
           && ix86_cmodel != CM_SMALL_PIC
6070
           && local_symbolic_operand (addr, Pmode))
6071
    {
6072
      rtx tmpreg;
6073
      /* This symbol may be referenced via a displacement from the PIC
6074
         base address (@GOTOFF).  */
6075
 
6076
      if (reload_in_progress)
6077
        regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6078
      if (GET_CODE (addr) == CONST)
6079
        addr = XEXP (addr, 0);
6080
      if (GET_CODE (addr) == PLUS)
6081
          {
6082
            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6083
            new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6084
          }
6085
        else
6086
          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6087
      new = gen_rtx_CONST (Pmode, new);
6088
      if (!reg)
6089
        tmpreg = gen_reg_rtx (Pmode);
6090
      else
6091
        tmpreg = reg;
6092
      emit_move_insn (tmpreg, new);
6093
 
6094
      if (reg != 0)
6095
        {
6096
          new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6097
                                     tmpreg, 1, OPTAB_DIRECT);
6098
          new = reg;
6099
        }
6100
      else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6101
    }
6102
  else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6103
    {
6104
      /* This symbol may be referenced via a displacement from the PIC
6105
         base address (@GOTOFF).  */
6106
 
6107
      if (reload_in_progress)
6108
        regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6109
      if (GET_CODE (addr) == CONST)
6110
        addr = XEXP (addr, 0);
6111
      if (GET_CODE (addr) == PLUS)
6112
          {
6113
            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6114
            new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6115
          }
6116
        else
6117
          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6118
      new = gen_rtx_CONST (Pmode, new);
6119
      new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6120
 
6121
      if (reg != 0)
6122
        {
6123
          emit_move_insn (reg, new);
6124
          new = reg;
6125
        }
6126
    }
6127
  else if (GET_CODE (addr) == SYMBOL_REF)
6128
    {
6129
      if (TARGET_64BIT)
6130
        {
6131
          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6132
          new = gen_rtx_CONST (Pmode, new);
6133
          new = gen_const_mem (Pmode, new);
6134
          set_mem_alias_set (new, ix86_GOT_alias_set ());
6135
 
6136
          if (reg == 0)
6137
            reg = gen_reg_rtx (Pmode);
6138
          /* Use directly gen_movsi, otherwise the address is loaded
6139
             into register for CSE.  We don't want to CSE this addresses,
6140
             instead we CSE addresses from the GOT table, so skip this.  */
6141
          emit_insn (gen_movsi (reg, new));
6142
          new = reg;
6143
        }
6144
      else
6145
        {
6146
          /* This symbol must be referenced via a load from the
6147
             Global Offset Table (@GOT).  */
6148
 
6149
          if (reload_in_progress)
6150
            regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6151
          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6152
          new = gen_rtx_CONST (Pmode, new);
6153
          new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6154
          new = gen_const_mem (Pmode, new);
6155
          set_mem_alias_set (new, ix86_GOT_alias_set ());
6156
 
6157
          if (reg == 0)
6158
            reg = gen_reg_rtx (Pmode);
6159
          emit_move_insn (reg, new);
6160
          new = reg;
6161
        }
6162
    }
6163
  else
6164
    {
6165
      if (GET_CODE (addr) == CONST_INT
6166
          && !x86_64_immediate_operand (addr, VOIDmode))
6167
        {
6168
          if (reg)
6169
            {
6170
              emit_move_insn (reg, addr);
6171
              new = reg;
6172
            }
6173
          else
6174
            new = force_reg (Pmode, addr);
6175
        }
6176
      else if (GET_CODE (addr) == CONST)
6177
        {
6178
          addr = XEXP (addr, 0);
6179
 
6180
          /* We must match stuff we generate before.  Assume the only
6181
             unspecs that can get here are ours.  Not that we could do
6182
             anything with them anyway....  */
6183
          if (GET_CODE (addr) == UNSPEC
6184
              || (GET_CODE (addr) == PLUS
6185
                  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6186
            return orig;
6187
          gcc_assert (GET_CODE (addr) == PLUS);
6188
        }
6189
      if (GET_CODE (addr) == PLUS)
6190
        {
6191
          rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6192
 
6193
          /* Check first to see if this is a constant offset from a @GOTOFF
6194
             symbol reference.  */
6195
          if (local_symbolic_operand (op0, Pmode)
6196
              && GET_CODE (op1) == CONST_INT)
6197
            {
6198
              if (!TARGET_64BIT)
6199
                {
6200
                  if (reload_in_progress)
6201
                    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6202
                  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6203
                                        UNSPEC_GOTOFF);
6204
                  new = gen_rtx_PLUS (Pmode, new, op1);
6205
                  new = gen_rtx_CONST (Pmode, new);
6206
                  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6207
 
6208
                  if (reg != 0)
6209
                    {
6210
                      emit_move_insn (reg, new);
6211
                      new = reg;
6212
                    }
6213
                }
6214
              else
6215
                {
6216
                  if (INTVAL (op1) < -16*1024*1024
6217
                      || INTVAL (op1) >= 16*1024*1024)
6218
                    {
6219
                      if (!x86_64_immediate_operand (op1, Pmode))
6220
                        op1 = force_reg (Pmode, op1);
6221
                      new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6222
                    }
6223
                }
6224
            }
6225
          else
6226
            {
6227
              base = legitimize_pic_address (XEXP (addr, 0), reg);
6228
              new  = legitimize_pic_address (XEXP (addr, 1),
6229
                                             base == reg ? NULL_RTX : reg);
6230
 
6231
              if (GET_CODE (new) == CONST_INT)
6232
                new = plus_constant (base, INTVAL (new));
6233
              else
6234
                {
6235
                  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6236
                    {
6237
                      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6238
                      new = XEXP (new, 1);
6239
                    }
6240
                  new = gen_rtx_PLUS (Pmode, base, new);
6241
                }
6242
            }
6243
        }
6244
    }
6245
  return new;
6246
}
6247
 
6248
/* Load the thread pointer.  If TO_REG is true, force it into a register.  */
6249
 
6250
static rtx
6251
get_thread_pointer (int to_reg)
6252
{
6253
  rtx tp, reg, insn;
6254
 
6255
  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6256
  if (!to_reg)
6257
    return tp;
6258
 
6259
  reg = gen_reg_rtx (Pmode);
6260
  insn = gen_rtx_SET (VOIDmode, reg, tp);
6261
  insn = emit_insn (insn);
6262
 
6263
  return reg;
6264
}
6265
 
6266
/* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
6267
   false if we expect this to be used for a memory address and true if
6268
   we expect to load the address into a register.  */
6269
 
6270
static rtx
6271
legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6272
{
6273
  rtx dest, base, off, pic;
6274
  int type;
6275
 
6276
  switch (model)
6277
    {
6278
    case TLS_MODEL_GLOBAL_DYNAMIC:
6279
      dest = gen_reg_rtx (Pmode);
6280
      if (TARGET_64BIT)
6281
        {
6282
          rtx rax = gen_rtx_REG (Pmode, 0), insns;
6283
 
6284
          start_sequence ();
6285
          emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6286
          insns = get_insns ();
6287
          end_sequence ();
6288
 
6289
          emit_libcall_block (insns, dest, rax, x);
6290
        }
6291
      else
6292
        emit_insn (gen_tls_global_dynamic_32 (dest, x));
6293
      break;
6294
 
6295
    case TLS_MODEL_LOCAL_DYNAMIC:
6296
      base = gen_reg_rtx (Pmode);
6297
      if (TARGET_64BIT)
6298
        {
6299
          rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6300
 
6301
          start_sequence ();
6302
          emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6303
          insns = get_insns ();
6304
          end_sequence ();
6305
 
6306
          note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6307
          note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6308
          emit_libcall_block (insns, base, rax, note);
6309
        }
6310
      else
6311
        emit_insn (gen_tls_local_dynamic_base_32 (base));
6312
 
6313
      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6314
      off = gen_rtx_CONST (Pmode, off);
6315
 
6316
      return gen_rtx_PLUS (Pmode, base, off);
6317
 
6318
    case TLS_MODEL_INITIAL_EXEC:
6319
      if (TARGET_64BIT)
6320
        {
6321
          pic = NULL;
6322
          type = UNSPEC_GOTNTPOFF;
6323
        }
6324
      else if (flag_pic)
6325
        {
6326
          if (reload_in_progress)
6327
            regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6328
          pic = pic_offset_table_rtx;
6329
          type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6330
        }
6331
      else if (!TARGET_GNU_TLS)
6332
        {
6333
          pic = gen_reg_rtx (Pmode);
6334
          emit_insn (gen_set_got (pic));
6335
          type = UNSPEC_GOTTPOFF;
6336
        }
6337
      else
6338
        {
6339
          pic = NULL;
6340
          type = UNSPEC_INDNTPOFF;
6341
        }
6342
 
6343
      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6344
      off = gen_rtx_CONST (Pmode, off);
6345
      if (pic)
6346
        off = gen_rtx_PLUS (Pmode, pic, off);
6347
      off = gen_const_mem (Pmode, off);
6348
      set_mem_alias_set (off, ix86_GOT_alias_set ());
6349
 
6350
      if (TARGET_64BIT || TARGET_GNU_TLS)
6351
        {
6352
          base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6353
          off = force_reg (Pmode, off);
6354
          return gen_rtx_PLUS (Pmode, base, off);
6355
        }
6356
      else
6357
        {
6358
          base = get_thread_pointer (true);
6359
          dest = gen_reg_rtx (Pmode);
6360
          emit_insn (gen_subsi3 (dest, base, off));
6361
        }
6362
      break;
6363
 
6364
    case TLS_MODEL_LOCAL_EXEC:
6365
      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6366
                            (TARGET_64BIT || TARGET_GNU_TLS)
6367
                            ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6368
      off = gen_rtx_CONST (Pmode, off);
6369
 
6370
      if (TARGET_64BIT || TARGET_GNU_TLS)
6371
        {
6372
          base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6373
          return gen_rtx_PLUS (Pmode, base, off);
6374
        }
6375
      else
6376
        {
6377
          base = get_thread_pointer (true);
6378
          dest = gen_reg_rtx (Pmode);
6379
          emit_insn (gen_subsi3 (dest, base, off));
6380
        }
6381
      break;
6382
 
6383
    default:
6384
      gcc_unreachable ();
6385
    }
6386
 
6387
  return dest;
6388
}
6389
 
6390
/* Try machine-dependent ways of modifying an illegitimate address
6391
   to be legitimate.  If we find one, return the new, valid address.
6392
   This macro is used in only one place: `memory_address' in explow.c.
6393
 
6394
   OLDX is the address as it was before break_out_memory_refs was called.
6395
   In some cases it is useful to look at this to decide what needs to be done.
6396
 
6397
   MODE and WIN are passed so that this macro can use
6398
   GO_IF_LEGITIMATE_ADDRESS.
6399
 
6400
   It is always safe for this macro to do nothing.  It exists to recognize
6401
   opportunities to optimize the output.
6402
 
6403
   For the 80386, we handle X+REG by loading X into a register R and
6404
   using R+REG.  R will go in a general reg and indexing will be used.
6405
   However, if REG is a broken-out memory address or multiplication,
6406
   nothing needs to be done because REG can certainly go in a general reg.
6407
 
6408
   When -fpic is used, special handling is needed for symbolic references.
6409
   See comments by legitimize_pic_address in i386.c for details.  */
6410
 
6411
rtx
6412
legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6413
{
6414
  int changed = 0;
6415
  unsigned log;
6416
 
6417
  if (TARGET_DEBUG_ADDR)
6418
    {
6419
      fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6420
               GET_MODE_NAME (mode));
6421
      debug_rtx (x);
6422
    }
6423
 
6424
  log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6425
  if (log)
6426
    return legitimize_tls_address (x, log, false);
6427
  if (GET_CODE (x) == CONST
6428
      && GET_CODE (XEXP (x, 0)) == PLUS
6429
      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6430
      && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6431
    {
6432
      rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6433
      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6434
    }
6435
 
6436
  if (flag_pic && SYMBOLIC_CONST (x))
6437
    return legitimize_pic_address (x, 0);
6438
 
6439
  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6440
  if (GET_CODE (x) == ASHIFT
6441
      && GET_CODE (XEXP (x, 1)) == CONST_INT
6442
      && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6443
    {
6444
      changed = 1;
6445
      log = INTVAL (XEXP (x, 1));
6446
      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6447
                        GEN_INT (1 << log));
6448
    }
6449
 
6450
  if (GET_CODE (x) == PLUS)
6451
    {
6452
      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
6453
 
6454
      if (GET_CODE (XEXP (x, 0)) == ASHIFT
6455
          && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6456
          && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6457
        {
6458
          changed = 1;
6459
          log = INTVAL (XEXP (XEXP (x, 0), 1));
6460
          XEXP (x, 0) = gen_rtx_MULT (Pmode,
6461
                                      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6462
                                      GEN_INT (1 << log));
6463
        }
6464
 
6465
      if (GET_CODE (XEXP (x, 1)) == ASHIFT
6466
          && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6467
          && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
6468
        {
6469
          changed = 1;
6470
          log = INTVAL (XEXP (XEXP (x, 1), 1));
6471
          XEXP (x, 1) = gen_rtx_MULT (Pmode,
6472
                                      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6473
                                      GEN_INT (1 << log));
6474
        }
6475
 
6476
      /* Put multiply first if it isn't already.  */
6477
      if (GET_CODE (XEXP (x, 1)) == MULT)
6478
        {
6479
          rtx tmp = XEXP (x, 0);
6480
          XEXP (x, 0) = XEXP (x, 1);
6481
          XEXP (x, 1) = tmp;
6482
          changed = 1;
6483
        }
6484
 
6485
      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6486
         into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
6487
         created by virtual register instantiation, register elimination, and
6488
         similar optimizations.  */
6489
      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6490
        {
6491
          changed = 1;
6492
          x = gen_rtx_PLUS (Pmode,
6493
                            gen_rtx_PLUS (Pmode, XEXP (x, 0),
6494
                                          XEXP (XEXP (x, 1), 0)),
6495
                            XEXP (XEXP (x, 1), 1));
6496
        }
6497
 
6498
      /* Canonicalize
6499
         (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6500
         into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
6501
      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6502
               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6503
               && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6504
               && CONSTANT_P (XEXP (x, 1)))
6505
        {
6506
          rtx constant;
6507
          rtx other = NULL_RTX;
6508
 
6509
          if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6510
            {
6511
              constant = XEXP (x, 1);
6512
              other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6513
            }
6514
          else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6515
            {
6516
              constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6517
              other = XEXP (x, 1);
6518
            }
6519
          else
6520
            constant = 0;
6521
 
6522
          if (constant)
6523
            {
6524
              changed = 1;
6525
              x = gen_rtx_PLUS (Pmode,
6526
                                gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6527
                                              XEXP (XEXP (XEXP (x, 0), 1), 0)),
6528
                                plus_constant (other, INTVAL (constant)));
6529
            }
6530
        }
6531
 
6532
      if (changed && legitimate_address_p (mode, x, FALSE))
6533
        return x;
6534
 
6535
      if (GET_CODE (XEXP (x, 0)) == MULT)
6536
        {
6537
          changed = 1;
6538
          XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6539
        }
6540
 
6541
      if (GET_CODE (XEXP (x, 1)) == MULT)
6542
        {
6543
          changed = 1;
6544
          XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6545
        }
6546
 
6547
      if (changed
6548
          && GET_CODE (XEXP (x, 1)) == REG
6549
          && GET_CODE (XEXP (x, 0)) == REG)
6550
        return x;
6551
 
6552
      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6553
        {
6554
          changed = 1;
6555
          x = legitimize_pic_address (x, 0);
6556
        }
6557
 
6558
      if (changed && legitimate_address_p (mode, x, FALSE))
6559
        return x;
6560
 
6561
      if (GET_CODE (XEXP (x, 0)) == REG)
6562
        {
6563
          rtx temp = gen_reg_rtx (Pmode);
6564
          rtx val  = force_operand (XEXP (x, 1), temp);
6565
          if (val != temp)
6566
            emit_move_insn (temp, val);
6567
 
6568
          XEXP (x, 1) = temp;
6569
          return x;
6570
        }
6571
 
6572
      else if (GET_CODE (XEXP (x, 1)) == REG)
6573
        {
6574
          rtx temp = gen_reg_rtx (Pmode);
6575
          rtx val  = force_operand (XEXP (x, 0), temp);
6576
          if (val != temp)
6577
            emit_move_insn (temp, val);
6578
 
6579
          XEXP (x, 0) = temp;
6580
          return x;
6581
        }
6582
    }
6583
 
6584
  return x;
6585
}
6586
 
6587
/* Print an integer constant expression in assembler syntax.  Addition
6588
   and subtraction are the only arithmetic that may appear in these
6589
   expressions.  FILE is the stdio stream to write to, X is the rtx, and
6590
   CODE is the operand print code from the output string.  */
6591
 
6592
static void
6593
output_pic_addr_const (FILE *file, rtx x, int code)
6594
{
6595
  char buf[256];
6596
 
6597
  switch (GET_CODE (x))
6598
    {
6599
    case PC:
6600
      gcc_assert (flag_pic);
6601
      putc ('.', file);
6602
      break;
6603
 
6604
    case SYMBOL_REF:
6605
      assemble_name (file, XSTR (x, 0));
6606
      if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6607
        fputs ("@PLT", file);
6608
      break;
6609
 
6610
    case LABEL_REF:
6611
      x = XEXP (x, 0);
6612
      /* FALLTHRU */
6613
    case CODE_LABEL:
6614
      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6615
      assemble_name (asm_out_file, buf);
6616
      break;
6617
 
6618
    case CONST_INT:
6619
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6620
      break;
6621
 
6622
    case CONST:
6623
      /* This used to output parentheses around the expression,
6624
         but that does not work on the 386 (either ATT or BSD assembler).  */
6625
      output_pic_addr_const (file, XEXP (x, 0), code);
6626
      break;
6627
 
6628
    case CONST_DOUBLE:
6629
      if (GET_MODE (x) == VOIDmode)
6630
        {
6631
          /* We can use %d if the number is <32 bits and positive.  */
6632
          if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6633
            fprintf (file, "0x%lx%08lx",
6634
                     (unsigned long) CONST_DOUBLE_HIGH (x),
6635
                     (unsigned long) CONST_DOUBLE_LOW (x));
6636
          else
6637
            fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6638
        }
6639
      else
6640
        /* We can't handle floating point constants;
6641
           PRINT_OPERAND must handle them.  */
6642
        output_operand_lossage ("floating constant misused");
6643
      break;
6644
 
6645
    case PLUS:
6646
      /* Some assemblers need integer constants to appear first.  */
6647
      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6648
        {
6649
          output_pic_addr_const (file, XEXP (x, 0), code);
6650
          putc ('+', file);
6651
          output_pic_addr_const (file, XEXP (x, 1), code);
6652
        }
6653
      else
6654
        {
6655
          gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
6656
          output_pic_addr_const (file, XEXP (x, 1), code);
6657
          putc ('+', file);
6658
          output_pic_addr_const (file, XEXP (x, 0), code);
6659
        }
6660
      break;
6661
 
6662
    case MINUS:
6663
      if (!TARGET_MACHO)
6664
        putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6665
      output_pic_addr_const (file, XEXP (x, 0), code);
6666
      putc ('-', file);
6667
      output_pic_addr_const (file, XEXP (x, 1), code);
6668
      if (!TARGET_MACHO)
6669
        putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6670
      break;
6671
 
6672
     case UNSPEC:
6673
       gcc_assert (XVECLEN (x, 0) == 1);
6674
       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6675
       switch (XINT (x, 1))
6676
        {
6677
        case UNSPEC_GOT:
6678
          fputs ("@GOT", file);
6679
          break;
6680
        case UNSPEC_GOTOFF:
6681
          fputs ("@GOTOFF", file);
6682
          break;
6683
        case UNSPEC_GOTPCREL:
6684
          fputs ("@GOTPCREL(%rip)", file);
6685
          break;
6686
        case UNSPEC_GOTTPOFF:
6687
          /* FIXME: This might be @TPOFF in Sun ld too.  */
6688
          fputs ("@GOTTPOFF", file);
6689
          break;
6690
        case UNSPEC_TPOFF:
6691
          fputs ("@TPOFF", file);
6692
          break;
6693
        case UNSPEC_NTPOFF:
6694
          if (TARGET_64BIT)
6695
            fputs ("@TPOFF", file);
6696
          else
6697
            fputs ("@NTPOFF", file);
6698
          break;
6699
        case UNSPEC_DTPOFF:
6700
          fputs ("@DTPOFF", file);
6701
          break;
6702
        case UNSPEC_GOTNTPOFF:
6703
          if (TARGET_64BIT)
6704
            fputs ("@GOTTPOFF(%rip)", file);
6705
          else
6706
            fputs ("@GOTNTPOFF", file);
6707
          break;
6708
        case UNSPEC_INDNTPOFF:
6709
          fputs ("@INDNTPOFF", file);
6710
          break;
6711
        default:
6712
          output_operand_lossage ("invalid UNSPEC as operand");
6713
          break;
6714
        }
6715
       break;
6716
 
6717
    default:
6718
      output_operand_lossage ("invalid expression as operand");
6719
    }
6720
}
6721
 
6722
/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6723
   We need to emit DTP-relative relocations.  */
6724
 
6725
static void
6726
i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6727
{
6728
  fputs (ASM_LONG, file);
6729
  output_addr_const (file, x);
6730
  fputs ("@DTPOFF", file);
6731
  switch (size)
6732
    {
6733
    case 4:
6734
      break;
6735
    case 8:
6736
      fputs (", 0", file);
6737
      break;
6738
    default:
6739
      gcc_unreachable ();
6740
   }
6741
}
6742
 
6743
/* In the name of slightly smaller debug output, and to cater to
6744
   general assembler lossage, recognize PIC+GOTOFF and turn it back
6745
   into a direct symbol reference.  */
6746
 
6747
static rtx
6748
ix86_delegitimize_address (rtx orig_x)
6749
{
6750
  rtx x = orig_x, y;
6751
 
6752
  if (GET_CODE (x) == MEM)
6753
    x = XEXP (x, 0);
6754
 
6755
  if (TARGET_64BIT)
6756
    {
6757
      if (GET_CODE (x) != CONST
6758
          || GET_CODE (XEXP (x, 0)) != UNSPEC
6759
          || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6760
          || GET_CODE (orig_x) != MEM)
6761
        return orig_x;
6762
      return XVECEXP (XEXP (x, 0), 0, 0);
6763
    }
6764
 
6765
  if (GET_CODE (x) != PLUS
6766
      || GET_CODE (XEXP (x, 1)) != CONST)
6767
    return orig_x;
6768
 
6769
  if (GET_CODE (XEXP (x, 0)) == REG
6770
      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6771
    /* %ebx + GOT/GOTOFF */
6772
    y = NULL;
6773
  else if (GET_CODE (XEXP (x, 0)) == PLUS)
6774
    {
6775
      /* %ebx + %reg * scale + GOT/GOTOFF */
6776
      y = XEXP (x, 0);
6777
      if (GET_CODE (XEXP (y, 0)) == REG
6778
          && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6779
        y = XEXP (y, 1);
6780
      else if (GET_CODE (XEXP (y, 1)) == REG
6781
               && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6782
        y = XEXP (y, 0);
6783
      else
6784
        return orig_x;
6785
      if (GET_CODE (y) != REG
6786
          && GET_CODE (y) != MULT
6787
          && GET_CODE (y) != ASHIFT)
6788
        return orig_x;
6789
    }
6790
  else
6791
    return orig_x;
6792
 
6793
  x = XEXP (XEXP (x, 1), 0);
6794
  if (GET_CODE (x) == UNSPEC
6795
      && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6796
          || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6797
    {
6798
      if (y)
6799
        return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6800
      return XVECEXP (x, 0, 0);
6801
    }
6802
 
6803
  if (GET_CODE (x) == PLUS
6804
      && GET_CODE (XEXP (x, 0)) == UNSPEC
6805
      && GET_CODE (XEXP (x, 1)) == CONST_INT
6806
      && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6807
          || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6808
              && GET_CODE (orig_x) != MEM)))
6809
    {
6810
      x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6811
      if (y)
6812
        return gen_rtx_PLUS (Pmode, y, x);
6813
      return x;
6814
    }
6815
 
6816
  return orig_x;
6817
}
6818
 
6819
static void
6820
put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6821
                    int fp, FILE *file)
6822
{
6823
  const char *suffix;
6824
 
6825
  if (mode == CCFPmode || mode == CCFPUmode)
6826
    {
6827
      enum rtx_code second_code, bypass_code;
6828
      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6829
      gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
6830
      code = ix86_fp_compare_code_to_integer (code);
6831
      mode = CCmode;
6832
    }
6833
  if (reverse)
6834
    code = reverse_condition (code);
6835
 
6836
  switch (code)
6837
    {
6838
    case EQ:
6839
      suffix = "e";
6840
      break;
6841
    case NE:
6842
      suffix = "ne";
6843
      break;
6844
    case GT:
6845
      gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
6846
      suffix = "g";
6847
      break;
6848
    case GTU:
6849
      /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
6850
         Those same assemblers have the same but opposite lossage on cmov.  */
6851
      gcc_assert (mode == CCmode);
6852
      suffix = fp ? "nbe" : "a";
6853
      break;
6854
    case LT:
6855
      switch (mode)
6856
        {
6857
        case CCNOmode:
6858
        case CCGOCmode:
6859
          suffix = "s";
6860
          break;
6861
 
6862
        case CCmode:
6863
        case CCGCmode:
6864
          suffix = "l";
6865
          break;
6866
 
6867
        default:
6868
          gcc_unreachable ();
6869
        }
6870
      break;
6871
    case LTU:
6872
      gcc_assert (mode == CCmode);
6873
      suffix = "b";
6874
      break;
6875
    case GE:
6876
      switch (mode)
6877
        {
6878
        case CCNOmode:
6879
        case CCGOCmode:
6880
          suffix = "ns";
6881
          break;
6882
 
6883
        case CCmode:
6884
        case CCGCmode:
6885
          suffix = "ge";
6886
          break;
6887
 
6888
        default:
6889
          gcc_unreachable ();
6890
        }
6891
      break;
6892
    case GEU:
6893
      /* ??? As above.  */
6894
      gcc_assert (mode == CCmode);
6895
      suffix = fp ? "nb" : "ae";
6896
      break;
6897
    case LE:
6898
      gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
6899
      suffix = "le";
6900
      break;
6901
    case LEU:
6902
      gcc_assert (mode == CCmode);
6903
      suffix = "be";
6904
      break;
6905
    case UNORDERED:
6906
      suffix = fp ? "u" : "p";
6907
      break;
6908
    case ORDERED:
6909
      suffix = fp ? "nu" : "np";
6910
      break;
6911
    default:
6912
      gcc_unreachable ();
6913
    }
6914
  fputs (suffix, file);
6915
}
6916
 
6917
/* Print the name of register X to FILE based on its machine mode and number.
6918
   If CODE is 'w', pretend the mode is HImode.
6919
   If CODE is 'b', pretend the mode is QImode.
6920
   If CODE is 'k', pretend the mode is SImode.
6921
   If CODE is 'q', pretend the mode is DImode.
6922
   If CODE is 'h', pretend the reg is the 'high' byte register.
6923
   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
6924
 
6925
void
6926
print_reg (rtx x, int code, FILE *file)
6927
{
6928
  gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
6929
              && REGNO (x) != FRAME_POINTER_REGNUM
6930
              && REGNO (x) != FLAGS_REG
6931
              && REGNO (x) != FPSR_REG);
6932
 
6933
  if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6934
    putc ('%', file);
6935
 
6936
  if (code == 'w' || MMX_REG_P (x))
6937
    code = 2;
6938
  else if (code == 'b')
6939
    code = 1;
6940
  else if (code == 'k')
6941
    code = 4;
6942
  else if (code == 'q')
6943
    code = 8;
6944
  else if (code == 'y')
6945
    code = 3;
6946
  else if (code == 'h')
6947
    code = 0;
6948
  else
6949
    code = GET_MODE_SIZE (GET_MODE (x));
6950
 
6951
  /* Irritatingly, AMD extended registers use different naming convention
6952
     from the normal registers.  */
6953
  if (REX_INT_REG_P (x))
6954
    {
6955
      gcc_assert (TARGET_64BIT);
6956
      switch (code)
6957
        {
6958
          case 0:
6959
            error ("extended registers have no high halves");
6960
            break;
6961
          case 1:
6962
            fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6963
            break;
6964
          case 2:
6965
            fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6966
            break;
6967
          case 4:
6968
            fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6969
            break;
6970
          case 8:
6971
            fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6972
            break;
6973
          default:
6974
            error ("unsupported operand size for extended register");
6975
            break;
6976
        }
6977
      return;
6978
    }
6979
  switch (code)
6980
    {
6981
    case 3:
6982
      if (STACK_TOP_P (x))
6983
        {
6984
          fputs ("st(0)", file);
6985
          break;
6986
        }
6987
      /* FALLTHRU */
6988
    case 8:
6989
    case 4:
6990
    case 12:
6991
      if (! ANY_FP_REG_P (x))
6992
        putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6993
      /* FALLTHRU */
6994
    case 16:
6995
    case 2:
6996
    normal:
6997
      fputs (hi_reg_name[REGNO (x)], file);
6998
      break;
6999
    case 1:
7000
      if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7001
        goto normal;
7002
      fputs (qi_reg_name[REGNO (x)], file);
7003
      break;
7004
    case 0:
7005
      if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7006
        goto normal;
7007
      fputs (qi_high_reg_name[REGNO (x)], file);
7008
      break;
7009
    default:
7010
      gcc_unreachable ();
7011
    }
7012
}
7013
 
7014
/* Locate some local-dynamic symbol still in use by this function
7015
   so that we can print its name in some tls_local_dynamic_base
7016
   pattern.  */
7017
 
7018
static const char *
7019
get_some_local_dynamic_name (void)
7020
{
7021
  rtx insn;
7022
 
7023
  if (cfun->machine->some_ld_name)
7024
    return cfun->machine->some_ld_name;
7025
 
7026
  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7027
    if (INSN_P (insn)
7028
        && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7029
      return cfun->machine->some_ld_name;
7030
 
7031
  gcc_unreachable ();
7032
}
7033
 
7034
static int
7035
get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7036
{
7037
  rtx x = *px;
7038
 
7039
  if (GET_CODE (x) == SYMBOL_REF
7040
      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7041
    {
7042
      cfun->machine->some_ld_name = XSTR (x, 0);
7043
      return 1;
7044
    }
7045
 
7046
  return 0;
7047
}
7048
 
7049
/* Meaning of CODE:
7050
   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7051
   C -- print opcode suffix for set/cmov insn.
7052
   c -- like C, but print reversed condition
7053
   F,f -- likewise, but for floating-point.
7054
   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7055
        otherwise nothing
7056
   R -- print the prefix for register names.
7057
   z -- print the opcode suffix for the size of the current operand.
7058
   * -- print a star (in certain assembler syntax)
7059
   A -- print an absolute memory reference.
7060
   w -- print the operand as if it's a "word" (HImode) even if it isn't.
7061
   s -- print a shift double count, followed by the assemblers argument
7062
        delimiter.
7063
   b -- print the QImode name of the register for the indicated operand.
7064
        %b0 would print %al if operands[0] is reg 0.
7065
   w --  likewise, print the HImode name of the register.
7066
   k --  likewise, print the SImode name of the register.
7067
   q --  likewise, print the DImode name of the register.
7068
   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7069
   y -- print "st(0)" instead of "st" as a register.
7070
   D -- print condition for SSE cmp instruction.
7071
   P -- if PIC, print an @PLT suffix.
7072
   X -- don't print any sort of PIC '@' suffix for a symbol.
7073
   & -- print some in-use local-dynamic symbol name.
7074
   H -- print a memory address offset by 8; used for sse high-parts
7075
 */
7076
 
7077
void
7078
print_operand (FILE *file, rtx x, int code)
7079
{
7080
  if (code)
7081
    {
7082
      switch (code)
7083
        {
7084
        case '*':
7085
          if (ASSEMBLER_DIALECT == ASM_ATT)
7086
            putc ('*', file);
7087
          return;
7088
 
7089
        case '&':
7090
          assemble_name (file, get_some_local_dynamic_name ());
7091
          return;
7092
 
7093
        case 'A':
7094
          switch (ASSEMBLER_DIALECT)
7095
            {
7096
            case ASM_ATT:
7097
              putc ('*', file);
7098
              break;
7099
 
7100
            case ASM_INTEL:
7101
              /* Intel syntax. For absolute addresses, registers should not
7102
                 be surrounded by braces.  */
7103
              if (GET_CODE (x) != REG)
7104
                {
7105
                  putc ('[', file);
7106
                  PRINT_OPERAND (file, x, 0);
7107
                  putc (']', file);
7108
                  return;
7109
                }
7110
              break;
7111
 
7112
            default:
7113
              gcc_unreachable ();
7114
            }
7115
 
7116
          PRINT_OPERAND (file, x, 0);
7117
          return;
7118
 
7119
 
7120
        case 'L':
7121
          if (ASSEMBLER_DIALECT == ASM_ATT)
7122
            putc ('l', file);
7123
          return;
7124
 
7125
        case 'W':
7126
          if (ASSEMBLER_DIALECT == ASM_ATT)
7127
            putc ('w', file);
7128
          return;
7129
 
7130
        case 'B':
7131
          if (ASSEMBLER_DIALECT == ASM_ATT)
7132
            putc ('b', file);
7133
          return;
7134
 
7135
        case 'Q':
7136
          if (ASSEMBLER_DIALECT == ASM_ATT)
7137
            putc ('l', file);
7138
          return;
7139
 
7140
        case 'S':
7141
          if (ASSEMBLER_DIALECT == ASM_ATT)
7142
            putc ('s', file);
7143
          return;
7144
 
7145
        case 'T':
7146
          if (ASSEMBLER_DIALECT == ASM_ATT)
7147
            putc ('t', file);
7148
          return;
7149
 
7150
        case 'z':
7151
          /* 387 opcodes don't get size suffixes if the operands are
7152
             registers.  */
7153
          if (STACK_REG_P (x))
7154
            return;
7155
 
7156
          /* Likewise if using Intel opcodes.  */
7157
          if (ASSEMBLER_DIALECT == ASM_INTEL)
7158
            return;
7159
 
7160
          /* This is the size of op from size of operand.  */
7161
          switch (GET_MODE_SIZE (GET_MODE (x)))
7162
            {
7163
            case 2:
7164
#ifdef HAVE_GAS_FILDS_FISTS
7165
              putc ('s', file);
7166
#endif
7167
              return;
7168
 
7169
            case 4:
7170
              if (GET_MODE (x) == SFmode)
7171
                {
7172
                  putc ('s', file);
7173
                  return;
7174
                }
7175
              else
7176
                putc ('l', file);
7177
              return;
7178
 
7179
            case 12:
7180
            case 16:
7181
              putc ('t', file);
7182
              return;
7183
 
7184
            case 8:
7185
              if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7186
                {
7187
#ifdef GAS_MNEMONICS
7188
                  putc ('q', file);
7189
#else
7190
                  putc ('l', file);
7191
                  putc ('l', file);
7192
#endif
7193
                }
7194
              else
7195
                putc ('l', file);
7196
              return;
7197
 
7198
            default:
7199
              gcc_unreachable ();
7200
            }
7201
 
7202
        case 'b':
7203
        case 'w':
7204
        case 'k':
7205
        case 'q':
7206
        case 'h':
7207
        case 'y':
7208
        case 'X':
7209
        case 'P':
7210
          break;
7211
 
7212
        case 's':
7213
          if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7214
            {
7215
              PRINT_OPERAND (file, x, 0);
7216
              putc (',', file);
7217
            }
7218
          return;
7219
 
7220
        case 'D':
7221
          /* Little bit of braindamage here.  The SSE compare instructions
7222
             does use completely different names for the comparisons that the
7223
             fp conditional moves.  */
7224
          switch (GET_CODE (x))
7225
            {
7226
            case EQ:
7227
            case UNEQ:
7228
              fputs ("eq", file);
7229
              break;
7230
            case LT:
7231
            case UNLT:
7232
              fputs ("lt", file);
7233
              break;
7234
            case LE:
7235
            case UNLE:
7236
              fputs ("le", file);
7237
              break;
7238
            case UNORDERED:
7239
              fputs ("unord", file);
7240
              break;
7241
            case NE:
7242
            case LTGT:
7243
              fputs ("neq", file);
7244
              break;
7245
            case UNGE:
7246
            case GE:
7247
              fputs ("nlt", file);
7248
              break;
7249
            case UNGT:
7250
            case GT:
7251
              fputs ("nle", file);
7252
              break;
7253
            case ORDERED:
7254
              fputs ("ord", file);
7255
              break;
7256
            default:
7257
              gcc_unreachable ();
7258
            }
7259
          return;
7260
        case 'O':
7261
#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7262
          if (ASSEMBLER_DIALECT == ASM_ATT)
7263
            {
7264
              switch (GET_MODE (x))
7265
                {
7266
                case HImode: putc ('w', file); break;
7267
                case SImode:
7268
                case SFmode: putc ('l', file); break;
7269
                case DImode:
7270
                case DFmode: putc ('q', file); break;
7271
                default: gcc_unreachable ();
7272
                }
7273
              putc ('.', file);
7274
            }
7275
#endif
7276
          return;
7277
        case 'C':
7278
          put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7279
          return;
7280
        case 'F':
7281
#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7282
          if (ASSEMBLER_DIALECT == ASM_ATT)
7283
            putc ('.', file);
7284
#endif
7285
          put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7286
          return;
7287
 
7288
          /* Like above, but reverse condition */
7289
        case 'c':
7290
          /* Check to see if argument to %c is really a constant
7291
             and not a condition code which needs to be reversed.  */
7292
          if (!COMPARISON_P (x))
7293
          {
7294
            output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7295
             return;
7296
          }
7297
          put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7298
          return;
7299
        case 'f':
7300
#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7301
          if (ASSEMBLER_DIALECT == ASM_ATT)
7302
            putc ('.', file);
7303
#endif
7304
          put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7305
          return;
7306
 
7307
        case 'H':
7308
          /* It doesn't actually matter what mode we use here, as we're
7309
             only going to use this for printing.  */
7310
          x = adjust_address_nv (x, DImode, 8);
7311
          break;
7312
 
7313
        case '+':
7314
          {
7315
            rtx x;
7316
 
7317
            if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7318
              return;
7319
 
7320
            x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7321
            if (x)
7322
              {
7323
                int pred_val = INTVAL (XEXP (x, 0));
7324
 
7325
                if (pred_val < REG_BR_PROB_BASE * 45 / 100
7326
                    || pred_val > REG_BR_PROB_BASE * 55 / 100)
7327
                  {
7328
                    int taken = pred_val > REG_BR_PROB_BASE / 2;
7329
                    int cputaken = final_forward_branch_p (current_output_insn) == 0;
7330
 
7331
                    /* Emit hints only in the case default branch prediction
7332
                       heuristics would fail.  */
7333
                    if (taken != cputaken)
7334
                      {
7335
                        /* We use 3e (DS) prefix for taken branches and
7336
                           2e (CS) prefix for not taken branches.  */
7337
                        if (taken)
7338
                          fputs ("ds ; ", file);
7339
                        else
7340
                          fputs ("cs ; ", file);
7341
                      }
7342
                  }
7343
              }
7344
            return;
7345
          }
7346
        default:
7347
            output_operand_lossage ("invalid operand code '%c'", code);
7348
        }
7349
    }
7350
 
7351
  if (GET_CODE (x) == REG)
7352
    print_reg (x, code, file);
7353
 
7354
  else if (GET_CODE (x) == MEM)
7355
    {
7356
      /* No `byte ptr' prefix for call instructions.  */
7357
      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7358
        {
7359
          const char * size;
7360
          switch (GET_MODE_SIZE (GET_MODE (x)))
7361
            {
7362
            case 1: size = "BYTE"; break;
7363
            case 2: size = "WORD"; break;
7364
            case 4: size = "DWORD"; break;
7365
            case 8: size = "QWORD"; break;
7366
            case 12: size = "XWORD"; break;
7367
            case 16: size = "XMMWORD"; break;
7368
            default:
7369
              gcc_unreachable ();
7370
            }
7371
 
7372
          /* Check for explicit size override (codes 'b', 'w' and 'k')  */
7373
          if (code == 'b')
7374
            size = "BYTE";
7375
          else if (code == 'w')
7376
            size = "WORD";
7377
          else if (code == 'k')
7378
            size = "DWORD";
7379
 
7380
          fputs (size, file);
7381
          fputs (" PTR ", file);
7382
        }
7383
 
7384
      x = XEXP (x, 0);
7385
      /* Avoid (%rip) for call operands.  */
7386
      if (CONSTANT_ADDRESS_P (x) && code == 'P'
7387
               && GET_CODE (x) != CONST_INT)
7388
        output_addr_const (file, x);
7389
      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7390
        output_operand_lossage ("invalid constraints for operand");
7391
      else
7392
        output_address (x);
7393
    }
7394
 
7395
  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7396
    {
7397
      REAL_VALUE_TYPE r;
7398
      long l;
7399
 
7400
      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7401
      REAL_VALUE_TO_TARGET_SINGLE (r, l);
7402
 
7403
      if (ASSEMBLER_DIALECT == ASM_ATT)
7404
        putc ('$', file);
7405
      fprintf (file, "0x%08lx", l);
7406
    }
7407
 
7408
  /* These float cases don't actually occur as immediate operands.  */
7409
  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7410
    {
7411
      char dstr[30];
7412
 
7413
      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7414
      fprintf (file, "%s", dstr);
7415
    }
7416
 
7417
  else if (GET_CODE (x) == CONST_DOUBLE
7418
           && GET_MODE (x) == XFmode)
7419
    {
7420
      char dstr[30];
7421
 
7422
      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7423
      fprintf (file, "%s", dstr);
7424
    }
7425
 
7426
  else
7427
    {
7428
      /* We have patterns that allow zero sets of memory, for instance.
7429
         In 64-bit mode, we should probably support all 8-byte vectors,
7430
         since we can in fact encode that into an immediate.  */
7431
      if (GET_CODE (x) == CONST_VECTOR)
7432
        {
7433
          gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7434
          x = const0_rtx;
7435
        }
7436
 
7437
      if (code != 'P')
7438
        {
7439
          if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7440
            {
7441
              if (ASSEMBLER_DIALECT == ASM_ATT)
7442
                putc ('$', file);
7443
            }
7444
          else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7445
                   || GET_CODE (x) == LABEL_REF)
7446
            {
7447
              if (ASSEMBLER_DIALECT == ASM_ATT)
7448
                putc ('$', file);
7449
              else
7450
                fputs ("OFFSET FLAT:", file);
7451
            }
7452
        }
7453
      if (GET_CODE (x) == CONST_INT)
7454
        fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7455
      else if (flag_pic)
7456
        output_pic_addr_const (file, x, code);
7457
      else
7458
        output_addr_const (file, x);
7459
    }
7460
}
7461
 
7462
/* Print a memory operand whose address is ADDR.  */
7463
 
7464
void
7465
print_operand_address (FILE *file, rtx addr)
7466
{
7467
  struct ix86_address parts;
7468
  rtx base, index, disp;
7469
  int scale;
7470
  int ok = ix86_decompose_address (addr, &parts);
7471
 
7472
  gcc_assert (ok);
7473
 
7474
  base = parts.base;
7475
  index = parts.index;
7476
  disp = parts.disp;
7477
  scale = parts.scale;
7478
 
7479
  switch (parts.seg)
7480
    {
7481
    case SEG_DEFAULT:
7482
      break;
7483
    case SEG_FS:
7484
    case SEG_GS:
7485
      if (USER_LABEL_PREFIX[0] == 0)
7486
        putc ('%', file);
7487
      fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7488
      break;
7489
    default:
7490
      gcc_unreachable ();
7491
    }
7492
 
7493
  if (!base && !index)
7494
    {
7495
      /* Displacement only requires special attention.  */
7496
 
7497
      if (GET_CODE (disp) == CONST_INT)
7498
        {
7499
          if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7500
            {
7501
              if (USER_LABEL_PREFIX[0] == 0)
7502
                putc ('%', file);
7503
              fputs ("ds:", file);
7504
            }
7505
          fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7506
        }
7507
      else if (flag_pic)
7508
        output_pic_addr_const (file, disp, 0);
7509
      else
7510
        output_addr_const (file, disp);
7511
 
7512
      /* Use one byte shorter RIP relative addressing for 64bit mode.  */
7513
      if (TARGET_64BIT)
7514
        {
7515
          if (GET_CODE (disp) == CONST
7516
              && GET_CODE (XEXP (disp, 0)) == PLUS
7517
              && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7518
            disp = XEXP (XEXP (disp, 0), 0);
7519
          if (GET_CODE (disp) == LABEL_REF
7520
              || (GET_CODE (disp) == SYMBOL_REF
7521
                  && SYMBOL_REF_TLS_MODEL (disp) == 0))
7522
            fputs ("(%rip)", file);
7523
        }
7524
    }
7525
  else
7526
    {
7527
      if (ASSEMBLER_DIALECT == ASM_ATT)
7528
        {
7529
          if (disp)
7530
            {
7531
              if (flag_pic)
7532
                output_pic_addr_const (file, disp, 0);
7533
              else if (GET_CODE (disp) == LABEL_REF)
7534
                output_asm_label (disp);
7535
              else
7536
                output_addr_const (file, disp);
7537
            }
7538
 
7539
          putc ('(', file);
7540
          if (base)
7541
            print_reg (base, 0, file);
7542
          if (index)
7543
            {
7544
              putc (',', file);
7545
              print_reg (index, 0, file);
7546
              if (scale != 1)
7547
                fprintf (file, ",%d", scale);
7548
            }
7549
          putc (')', file);
7550
        }
7551
      else
7552
        {
7553
          rtx offset = NULL_RTX;
7554
 
7555
          if (disp)
7556
            {
7557
              /* Pull out the offset of a symbol; print any symbol itself.  */
7558
              if (GET_CODE (disp) == CONST
7559
                  && GET_CODE (XEXP (disp, 0)) == PLUS
7560
                  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7561
                {
7562
                  offset = XEXP (XEXP (disp, 0), 1);
7563
                  disp = gen_rtx_CONST (VOIDmode,
7564
                                        XEXP (XEXP (disp, 0), 0));
7565
                }
7566
 
7567
              if (flag_pic)
7568
                output_pic_addr_const (file, disp, 0);
7569
              else if (GET_CODE (disp) == LABEL_REF)
7570
                output_asm_label (disp);
7571
              else if (GET_CODE (disp) == CONST_INT)
7572
                offset = disp;
7573
              else
7574
                output_addr_const (file, disp);
7575
            }
7576
 
7577
          putc ('[', file);
7578
          if (base)
7579
            {
7580
              print_reg (base, 0, file);
7581
              if (offset)
7582
                {
7583
                  if (INTVAL (offset) >= 0)
7584
                    putc ('+', file);
7585
                  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7586
                }
7587
            }
7588
          else if (offset)
7589
            fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7590
          else
7591
            putc ('0', file);
7592
 
7593
          if (index)
7594
            {
7595
              putc ('+', file);
7596
              print_reg (index, 0, file);
7597
              if (scale != 1)
7598
                fprintf (file, "*%d", scale);
7599
            }
7600
          putc (']', file);
7601
        }
7602
    }
7603
}
7604
 
7605
bool
7606
output_addr_const_extra (FILE *file, rtx x)
7607
{
7608
  rtx op;
7609
 
7610
  if (GET_CODE (x) != UNSPEC)
7611
    return false;
7612
 
7613
  op = XVECEXP (x, 0, 0);
7614
  switch (XINT (x, 1))
7615
    {
7616
    case UNSPEC_GOTTPOFF:
7617
      output_addr_const (file, op);
7618
      /* FIXME: This might be @TPOFF in Sun ld.  */
7619
      fputs ("@GOTTPOFF", file);
7620
      break;
7621
    case UNSPEC_TPOFF:
7622
      output_addr_const (file, op);
7623
      fputs ("@TPOFF", file);
7624
      break;
7625
    case UNSPEC_NTPOFF:
7626
      output_addr_const (file, op);
7627
      if (TARGET_64BIT)
7628
        fputs ("@TPOFF", file);
7629
      else
7630
        fputs ("@NTPOFF", file);
7631
      break;
7632
    case UNSPEC_DTPOFF:
7633
      output_addr_const (file, op);
7634
      fputs ("@DTPOFF", file);
7635
      break;
7636
    case UNSPEC_GOTNTPOFF:
7637
      output_addr_const (file, op);
7638
      if (TARGET_64BIT)
7639
        fputs ("@GOTTPOFF(%rip)", file);
7640
      else
7641
        fputs ("@GOTNTPOFF", file);
7642
      break;
7643
    case UNSPEC_INDNTPOFF:
7644
      output_addr_const (file, op);
7645
      fputs ("@INDNTPOFF", file);
7646
      break;
7647
 
7648
    default:
7649
      return false;
7650
    }
7651
 
7652
  return true;
7653
}
7654
 
7655
/* Split one or more DImode RTL references into pairs of SImode
7656
   references.  The RTL can be REG, offsettable MEM, integer constant, or
7657
   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7658
   split and "num" is its length.  lo_half and hi_half are output arrays
7659
   that parallel "operands".  */
7660
 
7661
void
7662
split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7663
{
7664
  while (num--)
7665
    {
7666
      rtx op = operands[num];
7667
 
7668
      /* simplify_subreg refuse to split volatile memory addresses,
7669
         but we still have to handle it.  */
7670
      if (GET_CODE (op) == MEM)
7671
        {
7672
          lo_half[num] = adjust_address (op, SImode, 0);
7673
          hi_half[num] = adjust_address (op, SImode, 4);
7674
        }
7675
      else
7676
        {
7677
          lo_half[num] = simplify_gen_subreg (SImode, op,
7678
                                              GET_MODE (op) == VOIDmode
7679
                                              ? DImode : GET_MODE (op), 0);
7680
          hi_half[num] = simplify_gen_subreg (SImode, op,
7681
                                              GET_MODE (op) == VOIDmode
7682
                                              ? DImode : GET_MODE (op), 4);
7683
        }
7684
    }
7685
}
7686
/* Split one or more TImode RTL references into pairs of DImode
7687
   references.  The RTL can be REG, offsettable MEM, integer constant, or
7688
   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7689
   split and "num" is its length.  lo_half and hi_half are output arrays
7690
   that parallel "operands".  */
7691
 
7692
void
7693
split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7694
{
7695
  while (num--)
7696
    {
7697
      rtx op = operands[num];
7698
 
7699
      /* simplify_subreg refuse to split volatile memory addresses, but we
7700
         still have to handle it.  */
7701
      if (GET_CODE (op) == MEM)
7702
        {
7703
          lo_half[num] = adjust_address (op, DImode, 0);
7704
          hi_half[num] = adjust_address (op, DImode, 8);
7705
        }
7706
      else
7707
        {
7708
          lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7709
          hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7710
        }
7711
    }
7712
}
7713
 
7714
/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7715
   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
7716
   is the expression of the binary operation.  The output may either be
7717
   emitted here, or returned to the caller, like all output_* functions.
7718
 
7719
   There is no guarantee that the operands are the same mode, as they
7720
   might be within FLOAT or FLOAT_EXTEND expressions.  */
7721
 
7722
#ifndef SYSV386_COMPAT
7723
/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
7724
   wants to fix the assemblers because that causes incompatibility
7725
   with gcc.  No-one wants to fix gcc because that causes
7726
   incompatibility with assemblers...  You can use the option of
7727
   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
7728
#define SYSV386_COMPAT 1
7729
#endif
7730
 
7731
const char *
7732
output_387_binary_op (rtx insn, rtx *operands)
7733
{
7734
  static char buf[30];
7735
  const char *p;
7736
  const char *ssep;
7737
  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
7738
 
7739
#ifdef ENABLE_CHECKING
7740
  /* Even if we do not want to check the inputs, this documents input
7741
     constraints.  Which helps in understanding the following code.  */
7742
  if (STACK_REG_P (operands[0])
7743
      && ((REG_P (operands[1])
7744
           && REGNO (operands[0]) == REGNO (operands[1])
7745
           && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7746
          || (REG_P (operands[2])
7747
              && REGNO (operands[0]) == REGNO (operands[2])
7748
              && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7749
      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7750
    ; /* ok */
7751
  else
7752
    gcc_assert (is_sse);
7753
#endif
7754
 
7755
  switch (GET_CODE (operands[3]))
7756
    {
7757
    case PLUS:
7758
      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7759
          || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7760
        p = "fiadd";
7761
      else
7762
        p = "fadd";
7763
      ssep = "add";
7764
      break;
7765
 
7766
    case MINUS:
7767
      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7768
          || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7769
        p = "fisub";
7770
      else
7771
        p = "fsub";
7772
      ssep = "sub";
7773
      break;
7774
 
7775
    case MULT:
7776
      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7777
          || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7778
        p = "fimul";
7779
      else
7780
        p = "fmul";
7781
      ssep = "mul";
7782
      break;
7783
 
7784
    case DIV:
7785
      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7786
          || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7787
        p = "fidiv";
7788
      else
7789
        p = "fdiv";
7790
      ssep = "div";
7791
      break;
7792
 
7793
    default:
7794
      gcc_unreachable ();
7795
    }
7796
 
7797
  if (is_sse)
7798
   {
7799
      strcpy (buf, ssep);
7800
      if (GET_MODE (operands[0]) == SFmode)
7801
        strcat (buf, "ss\t{%2, %0|%0, %2}");
7802
      else
7803
        strcat (buf, "sd\t{%2, %0|%0, %2}");
7804
      return buf;
7805
   }
7806
  strcpy (buf, p);
7807
 
7808
  switch (GET_CODE (operands[3]))
7809
    {
7810
    case MULT:
7811
    case PLUS:
7812
      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7813
        {
7814
          rtx temp = operands[2];
7815
          operands[2] = operands[1];
7816
          operands[1] = temp;
7817
        }
7818
 
7819
      /* know operands[0] == operands[1].  */
7820
 
7821
      if (GET_CODE (operands[2]) == MEM)
7822
        {
7823
          p = "%z2\t%2";
7824
          break;
7825
        }
7826
 
7827
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7828
        {
7829
          if (STACK_TOP_P (operands[0]))
7830
            /* How is it that we are storing to a dead operand[2]?
7831
               Well, presumably operands[1] is dead too.  We can't
7832
               store the result to st(0) as st(0) gets popped on this
7833
               instruction.  Instead store to operands[2] (which I
7834
               think has to be st(1)).  st(1) will be popped later.
7835
               gcc <= 2.8.1 didn't have this check and generated
7836
               assembly code that the Unixware assembler rejected.  */
7837
            p = "p\t{%0, %2|%2, %0}";   /* st(1) = st(0) op st(1); pop */
7838
          else
7839
            p = "p\t{%2, %0|%0, %2}";   /* st(r1) = st(r1) op st(0); pop */
7840
          break;
7841
        }
7842
 
7843
      if (STACK_TOP_P (operands[0]))
7844
        p = "\t{%y2, %0|%0, %y2}";      /* st(0) = st(0) op st(r2) */
7845
      else
7846
        p = "\t{%2, %0|%0, %2}";        /* st(r1) = st(r1) op st(0) */
7847
      break;
7848
 
7849
    case MINUS:
7850
    case DIV:
7851
      if (GET_CODE (operands[1]) == MEM)
7852
        {
7853
          p = "r%z1\t%1";
7854
          break;
7855
        }
7856
 
7857
      if (GET_CODE (operands[2]) == MEM)
7858
        {
7859
          p = "%z2\t%2";
7860
          break;
7861
        }
7862
 
7863
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7864
        {
7865
#if SYSV386_COMPAT
7866
          /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7867
             derived assemblers, confusingly reverse the direction of
7868
             the operation for fsub{r} and fdiv{r} when the
7869
             destination register is not st(0).  The Intel assembler
7870
             doesn't have this brain damage.  Read !SYSV386_COMPAT to
7871
             figure out what the hardware really does.  */
7872
          if (STACK_TOP_P (operands[0]))
7873
            p = "{p\t%0, %2|rp\t%2, %0}";
7874
          else
7875
            p = "{rp\t%2, %0|p\t%0, %2}";
7876
#else
7877
          if (STACK_TOP_P (operands[0]))
7878
            /* As above for fmul/fadd, we can't store to st(0).  */
7879
            p = "rp\t{%0, %2|%2, %0}";  /* st(1) = st(0) op st(1); pop */
7880
          else
7881
            p = "p\t{%2, %0|%0, %2}";   /* st(r1) = st(r1) op st(0); pop */
7882
#endif
7883
          break;
7884
        }
7885
 
7886
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7887
        {
7888
#if SYSV386_COMPAT
7889
          if (STACK_TOP_P (operands[0]))
7890
            p = "{rp\t%0, %1|p\t%1, %0}";
7891
          else
7892
            p = "{p\t%1, %0|rp\t%0, %1}";
7893
#else
7894
          if (STACK_TOP_P (operands[0]))
7895
            p = "p\t{%0, %1|%1, %0}";   /* st(1) = st(1) op st(0); pop */
7896
          else
7897
            p = "rp\t{%1, %0|%0, %1}";  /* st(r2) = st(0) op st(r2); pop */
7898
#endif
7899
          break;
7900
        }
7901
 
7902
      if (STACK_TOP_P (operands[0]))
7903
        {
7904
          if (STACK_TOP_P (operands[1]))
7905
            p = "\t{%y2, %0|%0, %y2}";  /* st(0) = st(0) op st(r2) */
7906
          else
7907
            p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7908
          break;
7909
        }
7910
      else if (STACK_TOP_P (operands[1]))
7911
        {
7912
#if SYSV386_COMPAT
7913
          p = "{\t%1, %0|r\t%0, %1}";
7914
#else
7915
          p = "r\t{%1, %0|%0, %1}";     /* st(r2) = st(0) op st(r2) */
7916
#endif
7917
        }
7918
      else
7919
        {
7920
#if SYSV386_COMPAT
7921
          p = "{r\t%2, %0|\t%0, %2}";
7922
#else
7923
          p = "\t{%2, %0|%0, %2}";      /* st(r1) = st(r1) op st(0) */
7924
#endif
7925
        }
7926
      break;
7927
 
7928
    default:
7929
      gcc_unreachable ();
7930
    }
7931
 
7932
  strcat (buf, p);
7933
  return buf;
7934
}
7935
 
7936
/* Return needed mode for entity in optimize_mode_switching pass.  */
7937
 
7938
int
7939
ix86_mode_needed (int entity, rtx insn)
7940
{
7941
  enum attr_i387_cw mode;
7942
 
7943
  /* The mode UNINITIALIZED is used to store control word after a
7944
     function call or ASM pattern.  The mode ANY specify that function
7945
     has no requirements on the control word and make no changes in the
7946
     bits we are interested in.  */
7947
 
7948
  if (CALL_P (insn)
7949
      || (NONJUMP_INSN_P (insn)
7950
          && (asm_noperands (PATTERN (insn)) >= 0
7951
              || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
7952
    return I387_CW_UNINITIALIZED;
7953
 
7954
  if (recog_memoized (insn) < 0)
7955
    return I387_CW_ANY;
7956
 
7957
  mode = get_attr_i387_cw (insn);
7958
 
7959
  switch (entity)
7960
    {
7961
    case I387_TRUNC:
7962
      if (mode == I387_CW_TRUNC)
7963
        return mode;
7964
      break;
7965
 
7966
    case I387_FLOOR:
7967
      if (mode == I387_CW_FLOOR)
7968
        return mode;
7969
      break;
7970
 
7971
    case I387_CEIL:
7972
      if (mode == I387_CW_CEIL)
7973
        return mode;
7974
      break;
7975
 
7976
    case I387_MASK_PM:
7977
      if (mode == I387_CW_MASK_PM)
7978
        return mode;
7979
      break;
7980
 
7981
    default:
7982
      gcc_unreachable ();
7983
    }
7984
 
7985
  return I387_CW_ANY;
7986
}
7987
 
7988
/* Output code to initialize control word copies used by trunc?f?i and
7989
   rounding patterns.  CURRENT_MODE is set to current control word,
7990
   while NEW_MODE is set to new control word.  */
7991
 
7992
void
7993
emit_i387_cw_initialization (int mode)
7994
{
7995
  rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
7996
  rtx new_mode;
7997
 
7998
  int slot;
7999
 
8000
  rtx reg = gen_reg_rtx (HImode);
8001
 
8002
  emit_insn (gen_x86_fnstcw_1 (stored_mode));
8003
  emit_move_insn (reg, stored_mode);
8004
 
8005
  if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8006
    {
8007
      switch (mode)
8008
        {
8009
        case I387_CW_TRUNC:
8010
          /* round toward zero (truncate) */
8011
          emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8012
          slot = SLOT_CW_TRUNC;
8013
          break;
8014
 
8015
        case I387_CW_FLOOR:
8016
          /* round down toward -oo */
8017
          emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8018
          emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8019
          slot = SLOT_CW_FLOOR;
8020
          break;
8021
 
8022
        case I387_CW_CEIL:
8023
          /* round up toward +oo */
8024
          emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8025
          emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8026
          slot = SLOT_CW_CEIL;
8027
          break;
8028
 
8029
        case I387_CW_MASK_PM:
8030
          /* mask precision exception for nearbyint() */
8031
          emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8032
          slot = SLOT_CW_MASK_PM;
8033
          break;
8034
 
8035
        default:
8036
          gcc_unreachable ();
8037
        }
8038
    }
8039
  else
8040
    {
8041
      switch (mode)
8042
        {
8043
        case I387_CW_TRUNC:
8044
          /* round toward zero (truncate) */
8045
          emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8046
          slot = SLOT_CW_TRUNC;
8047
          break;
8048
 
8049
        case I387_CW_FLOOR:
8050
          /* round down toward -oo */
8051
          emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8052
          slot = SLOT_CW_FLOOR;
8053
          break;
8054
 
8055
        case I387_CW_CEIL:
8056
          /* round up toward +oo */
8057
          emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8058
          slot = SLOT_CW_CEIL;
8059
          break;
8060
 
8061
        case I387_CW_MASK_PM:
8062
          /* mask precision exception for nearbyint() */
8063
          emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8064
          slot = SLOT_CW_MASK_PM;
8065
          break;
8066
 
8067
        default:
8068
          gcc_unreachable ();
8069
        }
8070
    }
8071
 
8072
  gcc_assert (slot < MAX_386_STACK_LOCALS);
8073
 
8074
  new_mode = assign_386_stack_local (HImode, slot);
8075
  emit_move_insn (new_mode, reg);
8076
}
8077
 
8078
/* Output code for INSN to convert a float to a signed int.  OPERANDS
8079
   are the insn operands.  The output may be [HSD]Imode and the input
8080
   operand may be [SDX]Fmode.  */
8081
 
8082
const char *
8083
output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8084
{
8085
  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8086
  int dimode_p = GET_MODE (operands[0]) == DImode;
8087
  int round_mode = get_attr_i387_cw (insn);
8088
 
8089
  /* Jump through a hoop or two for DImode, since the hardware has no
8090
     non-popping instruction.  We used to do this a different way, but
8091
     that was somewhat fragile and broke with post-reload splitters.  */
8092
  if ((dimode_p || fisttp) && !stack_top_dies)
8093
    output_asm_insn ("fld\t%y1", operands);
8094
 
8095
  gcc_assert (STACK_TOP_P (operands[1]));
8096
  gcc_assert (GET_CODE (operands[0]) == MEM);
8097
 
8098
  if (fisttp)
8099
      output_asm_insn ("fisttp%z0\t%0", operands);
8100
  else
8101
    {
8102
      if (round_mode != I387_CW_ANY)
8103
        output_asm_insn ("fldcw\t%3", operands);
8104
      if (stack_top_dies || dimode_p)
8105
        output_asm_insn ("fistp%z0\t%0", operands);
8106
      else
8107
        output_asm_insn ("fist%z0\t%0", operands);
8108
      if (round_mode != I387_CW_ANY)
8109
        output_asm_insn ("fldcw\t%2", operands);
8110
    }
8111
 
8112
  return "";
8113
}
8114
 
8115
/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
8116
   should be used.  UNORDERED_P is true when fucom should be used.  */
8117
 
8118
const char *
8119
output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8120
{
8121
  int stack_top_dies;
8122
  rtx cmp_op0, cmp_op1;
8123
  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8124
 
8125
  if (eflags_p)
8126
    {
8127
      cmp_op0 = operands[0];
8128
      cmp_op1 = operands[1];
8129
    }
8130
  else
8131
    {
8132
      cmp_op0 = operands[1];
8133
      cmp_op1 = operands[2];
8134
    }
8135
 
8136
  if (is_sse)
8137
    {
8138
      if (GET_MODE (operands[0]) == SFmode)
8139
        if (unordered_p)
8140
          return "ucomiss\t{%1, %0|%0, %1}";
8141
        else
8142
          return "comiss\t{%1, %0|%0, %1}";
8143
      else
8144
        if (unordered_p)
8145
          return "ucomisd\t{%1, %0|%0, %1}";
8146
        else
8147
          return "comisd\t{%1, %0|%0, %1}";
8148
    }
8149
 
8150
  gcc_assert (STACK_TOP_P (cmp_op0));
8151
 
8152
  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8153
 
8154
  if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8155
    {
8156
      if (stack_top_dies)
8157
        {
8158
          output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8159
          return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
8160
        }
8161
      else
8162
        return "ftst\n\tfnstsw\t%0";
8163
    }
8164
 
8165
  if (STACK_REG_P (cmp_op1)
8166
      && stack_top_dies
8167
      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8168
      && REGNO (cmp_op1) != FIRST_STACK_REG)
8169
    {
8170
      /* If both the top of the 387 stack dies, and the other operand
8171
         is also a stack register that dies, then this must be a
8172
         `fcompp' float compare */
8173
 
8174
      if (eflags_p)
8175
        {
8176
          /* There is no double popping fcomi variant.  Fortunately,
8177
             eflags is immune from the fstp's cc clobbering.  */
8178
          if (unordered_p)
8179
            output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8180
          else
8181
            output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8182
          return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
8183
        }
8184
      else
8185
        {
8186
          if (unordered_p)
8187
            return "fucompp\n\tfnstsw\t%0";
8188
          else
8189
            return "fcompp\n\tfnstsw\t%0";
8190
        }
8191
    }
8192
  else
8193
    {
8194
      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
8195
 
8196
      static const char * const alt[16] =
8197
      {
8198
        "fcom%z2\t%y2\n\tfnstsw\t%0",
8199
        "fcomp%z2\t%y2\n\tfnstsw\t%0",
8200
        "fucom%z2\t%y2\n\tfnstsw\t%0",
8201
        "fucomp%z2\t%y2\n\tfnstsw\t%0",
8202
 
8203
        "ficom%z2\t%y2\n\tfnstsw\t%0",
8204
        "ficomp%z2\t%y2\n\tfnstsw\t%0",
8205
        NULL,
8206
        NULL,
8207
 
8208
        "fcomi\t{%y1, %0|%0, %y1}",
8209
        "fcomip\t{%y1, %0|%0, %y1}",
8210
        "fucomi\t{%y1, %0|%0, %y1}",
8211
        "fucomip\t{%y1, %0|%0, %y1}",
8212
 
8213
        NULL,
8214
        NULL,
8215
        NULL,
8216
        NULL
8217
      };
8218
 
8219
      int mask;
8220
      const char *ret;
8221
 
8222
      mask  = eflags_p << 3;
8223
      mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8224
      mask |= unordered_p << 1;
8225
      mask |= stack_top_dies;
8226
 
8227
      gcc_assert (mask < 16);
8228
      ret = alt[mask];
8229
      gcc_assert (ret);
8230
 
8231
      return ret;
8232
    }
8233
}
8234
 
8235
void
8236
ix86_output_addr_vec_elt (FILE *file, int value)
8237
{
8238
  const char *directive = ASM_LONG;
8239
 
8240
#ifdef ASM_QUAD
8241
  if (TARGET_64BIT)
8242
    directive = ASM_QUAD;
8243
#else
8244
  gcc_assert (!TARGET_64BIT);
8245
#endif
8246
 
8247
  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8248
}
8249
 
8250
void
8251
ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8252
{
8253
  if (TARGET_64BIT)
8254
    fprintf (file, "%s%s%d-%s%d\n",
8255
             ASM_LONG, LPREFIX, value, LPREFIX, rel);
8256
  else if (HAVE_AS_GOTOFF_IN_DATA)
8257
    fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8258
#if TARGET_MACHO
8259
  else if (TARGET_MACHO)
8260
    {
8261
      fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8262
      machopic_output_function_base_name (file);
8263
      fprintf(file, "\n");
8264
    }
8265
#endif
8266
  else
8267
    asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8268
                 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8269
}
8270
 
8271
/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8272
   for the target.  */
8273
 
8274
void
8275
ix86_expand_clear (rtx dest)
8276
{
8277
  rtx tmp;
8278
 
8279
  /* We play register width games, which are only valid after reload.  */
8280
  gcc_assert (reload_completed);
8281
 
8282
  /* Avoid HImode and its attendant prefix byte.  */
8283
  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8284
    dest = gen_rtx_REG (SImode, REGNO (dest));
8285
 
8286
  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8287
 
8288
  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
8289
  if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8290
    {
8291
      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8292
      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8293
    }
8294
 
8295
  emit_insn (tmp);
8296
}
8297
 
8298
/* X is an unchanging MEM.  If it is a constant pool reference, return
8299
   the constant pool rtx, else NULL.  */
8300
 
8301
rtx
8302
maybe_get_pool_constant (rtx x)
8303
{
8304
  x = ix86_delegitimize_address (XEXP (x, 0));
8305
 
8306
  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8307
    return get_pool_constant (x);
8308
 
8309
  return NULL_RTX;
8310
}
8311
 
8312
void
8313
ix86_expand_move (enum machine_mode mode, rtx operands[])
8314
{
8315
  int strict = (reload_in_progress || reload_completed);
8316
  rtx op0, op1;
8317
  enum tls_model model;
8318
 
8319
  op0 = operands[0];
8320
  op1 = operands[1];
8321
 
8322
  if (GET_CODE (op1) == SYMBOL_REF)
8323
    {
8324
      model = SYMBOL_REF_TLS_MODEL (op1);
8325
      if (model)
8326
        {
8327
          op1 = legitimize_tls_address (op1, model, true);
8328
          op1 = force_operand (op1, op0);
8329
          if (op1 == op0)
8330
            return;
8331
        }
8332
    }
8333
  else if (GET_CODE (op1) == CONST
8334
           && GET_CODE (XEXP (op1, 0)) == PLUS
8335
           && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8336
    {
8337
      model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8338
      if (model)
8339
        {
8340
          rtx addend = XEXP (XEXP (op1, 0), 1);
8341
          op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8342
          op1 = force_operand (op1, NULL);
8343
          op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8344
                                     op0, 1, OPTAB_DIRECT);
8345
          if (op1 == op0)
8346
            return;
8347
        }
8348
    }
8349
 
8350
  if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8351
    {
8352
#if TARGET_MACHO
8353
      if (MACHOPIC_PURE)
8354
        {
8355
          rtx temp = ((reload_in_progress
8356
                       || ((op0 && GET_CODE (op0) == REG)
8357
                           && mode == Pmode))
8358
                      ? op0 : gen_reg_rtx (Pmode));
8359
          op1 = machopic_indirect_data_reference (op1, temp);
8360
          op1 = machopic_legitimize_pic_address (op1, mode,
8361
                                                 temp == op1 ? 0 : temp);
8362
        }
8363
      else if (MACHOPIC_INDIRECT)
8364
        op1 = machopic_indirect_data_reference (op1, 0);
8365
      if (op0 == op1)
8366
        return;
8367
#else
8368
      if (GET_CODE (op0) == MEM)
8369
        op1 = force_reg (Pmode, op1);
8370
      else
8371
        op1 = legitimize_address (op1, op1, Pmode);
8372
#endif /* TARGET_MACHO */
8373
    }
8374
  else
8375
    {
8376
      if (GET_CODE (op0) == MEM
8377
          && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8378
              || !push_operand (op0, mode))
8379
          && GET_CODE (op1) == MEM)
8380
        op1 = force_reg (mode, op1);
8381
 
8382
      if (push_operand (op0, mode)
8383
          && ! general_no_elim_operand (op1, mode))
8384
        op1 = copy_to_mode_reg (mode, op1);
8385
 
8386
      /* Force large constants in 64bit compilation into register
8387
         to get them CSEed.  */
8388
      if (TARGET_64BIT && mode == DImode
8389
          && immediate_operand (op1, mode)
8390
          && !x86_64_zext_immediate_operand (op1, VOIDmode)
8391
          && !register_operand (op0, mode)
8392
          && optimize && !reload_completed && !reload_in_progress)
8393
        op1 = copy_to_mode_reg (mode, op1);
8394
 
8395
      if (FLOAT_MODE_P (mode))
8396
        {
8397
          /* If we are loading a floating point constant to a register,
8398
             force the value to memory now, since we'll get better code
8399
             out the back end.  */
8400
 
8401
          if (strict)
8402
            ;
8403
          else if (GET_CODE (op1) == CONST_DOUBLE)
8404
            {
8405
              op1 = validize_mem (force_const_mem (mode, op1));
8406
              if (!register_operand (op0, mode))
8407
                {
8408
                  rtx temp = gen_reg_rtx (mode);
8409
                  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8410
                  emit_move_insn (op0, temp);
8411
                  return;
8412
                }
8413
            }
8414
        }
8415
    }
8416
 
8417
  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8418
}
8419
 
8420
void
8421
ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8422
{
8423
  rtx op0 = operands[0], op1 = operands[1];
8424
 
8425
  /* Force constants other than zero into memory.  We do not know how
8426
     the instructions used to build constants modify the upper 64 bits
8427
     of the register, once we have that information we may be able
8428
     to handle some of them more efficiently.  */
8429
  if ((reload_in_progress | reload_completed) == 0
8430
      && register_operand (op0, mode)
8431
      && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
8432
    op1 = validize_mem (force_const_mem (mode, op1));
8433
 
8434
  /* Make operand1 a register if it isn't already.  */
8435
  if (!no_new_pseudos
8436
      && !register_operand (op0, mode)
8437
      && !register_operand (op1, mode))
8438
    {
8439
      emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
8440
      return;
8441
    }
8442
 
8443
  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8444
}
8445
 
8446
/* Implement the movmisalign patterns for SSE.  Non-SSE modes go
8447
   straight to ix86_expand_vector_move.  */
8448
 
8449
void
8450
ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
8451
{
8452
  rtx op0, op1, m;
8453
 
8454
  op0 = operands[0];
8455
  op1 = operands[1];
8456
 
8457
  if (MEM_P (op1))
8458
    {
8459
      /* If we're optimizing for size, movups is the smallest.  */
8460
      if (optimize_size)
8461
        {
8462
          op0 = gen_lowpart (V4SFmode, op0);
8463
          op1 = gen_lowpart (V4SFmode, op1);
8464
          emit_insn (gen_sse_movups (op0, op1));
8465
          return;
8466
        }
8467
 
8468
      /* ??? If we have typed data, then it would appear that using
8469
         movdqu is the only way to get unaligned data loaded with
8470
         integer type.  */
8471
      if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8472
        {
8473
          op0 = gen_lowpart (V16QImode, op0);
8474
          op1 = gen_lowpart (V16QImode, op1);
8475
          emit_insn (gen_sse2_movdqu (op0, op1));
8476
          return;
8477
        }
8478
 
8479
      if (TARGET_SSE2 && mode == V2DFmode)
8480
        {
8481
          rtx zero;
8482
 
8483
          /* When SSE registers are split into halves, we can avoid
8484
             writing to the top half twice.  */
8485
          if (TARGET_SSE_SPLIT_REGS)
8486
            {
8487
              emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8488
              zero = op0;
8489
            }
8490
          else
8491
            {
8492
              /* ??? Not sure about the best option for the Intel chips.
8493
                 The following would seem to satisfy; the register is
8494
                 entirely cleared, breaking the dependency chain.  We
8495
                 then store to the upper half, with a dependency depth
8496
                 of one.  A rumor has it that Intel recommends two movsd
8497
                 followed by an unpacklpd, but this is unconfirmed.  And
8498
                 given that the dependency depth of the unpacklpd would
8499
                 still be one, I'm not sure why this would be better.  */
8500
              zero = CONST0_RTX (V2DFmode);
8501
            }
8502
 
8503
          m = adjust_address (op1, DFmode, 0);
8504
          emit_insn (gen_sse2_loadlpd (op0, zero, m));
8505
          m = adjust_address (op1, DFmode, 8);
8506
          emit_insn (gen_sse2_loadhpd (op0, op0, m));
8507
        }
8508
      else
8509
        {
8510
          if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
8511
            emit_move_insn (op0, CONST0_RTX (mode));
8512
          else
8513
            emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8514
 
8515
          if (mode != V4SFmode)
8516
            op0 = gen_lowpart (V4SFmode, op0);
8517
          m = adjust_address (op1, V2SFmode, 0);
8518
          emit_insn (gen_sse_loadlps (op0, op0, m));
8519
          m = adjust_address (op1, V2SFmode, 8);
8520
          emit_insn (gen_sse_loadhps (op0, op0, m));
8521
        }
8522
    }
8523
  else if (MEM_P (op0))
8524
    {
8525
      /* If we're optimizing for size, movups is the smallest.  */
8526
      if (optimize_size)
8527
        {
8528
          op0 = gen_lowpart (V4SFmode, op0);
8529
          op1 = gen_lowpart (V4SFmode, op1);
8530
          emit_insn (gen_sse_movups (op0, op1));
8531
          return;
8532
        }
8533
 
8534
      /* ??? Similar to above, only less clear because of quote
8535
         typeless stores unquote.  */
8536
      if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
8537
          && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8538
        {
8539
          op0 = gen_lowpart (V16QImode, op0);
8540
          op1 = gen_lowpart (V16QImode, op1);
8541
          emit_insn (gen_sse2_movdqu (op0, op1));
8542
          return;
8543
        }
8544
 
8545
      if (TARGET_SSE2 && mode == V2DFmode)
8546
        {
8547
          m = adjust_address (op0, DFmode, 0);
8548
          emit_insn (gen_sse2_storelpd (m, op1));
8549
          m = adjust_address (op0, DFmode, 8);
8550
          emit_insn (gen_sse2_storehpd (m, op1));
8551
        }
8552
      else
8553
        {
8554
          if (mode != V4SFmode)
8555
            op1 = gen_lowpart (V4SFmode, op1);
8556
          m = adjust_address (op0, V2SFmode, 0);
8557
          emit_insn (gen_sse_storelps (m, op1));
8558
          m = adjust_address (op0, V2SFmode, 8);
8559
          emit_insn (gen_sse_storehps (m, op1));
8560
        }
8561
    }
8562
  else
8563
    gcc_unreachable ();
8564
}
8565
 
8566
/* Expand a push in MODE.  This is some mode for which we do not support
8567
   proper push instructions, at least from the registers that we expect
8568
   the value to live in.  */
8569
 
8570
void
8571
ix86_expand_push (enum machine_mode mode, rtx x)
8572
{
8573
  rtx tmp;
8574
 
8575
  tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
8576
                             GEN_INT (-GET_MODE_SIZE (mode)),
8577
                             stack_pointer_rtx, 1, OPTAB_DIRECT);
8578
  if (tmp != stack_pointer_rtx)
8579
    emit_move_insn (stack_pointer_rtx, tmp);
8580
 
8581
  tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
8582
  emit_move_insn (tmp, x);
8583
}
8584
 
8585
/* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
8586
   destination to use for the operation.  If different from the true
8587
   destination in operands[0], a copy operation will be required.  */
8588
 
8589
rtx
8590
ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
8591
                            rtx operands[])
8592
{
8593
  int matching_memory;
8594
  rtx src1, src2, dst;
8595
 
8596
  dst = operands[0];
8597
  src1 = operands[1];
8598
  src2 = operands[2];
8599
 
8600
  /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8601
  if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8602
      && (rtx_equal_p (dst, src2)
8603
          || immediate_operand (src1, mode)))
8604
    {
8605
      rtx temp = src1;
8606
      src1 = src2;
8607
      src2 = temp;
8608
    }
8609
 
8610
  /* If the destination is memory, and we do not have matching source
8611
     operands, do things in registers.  */
8612
  matching_memory = 0;
8613
  if (GET_CODE (dst) == MEM)
8614
    {
8615
      if (rtx_equal_p (dst, src1))
8616
        matching_memory = 1;
8617
      else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8618
               && rtx_equal_p (dst, src2))
8619
        matching_memory = 2;
8620
      else
8621
        dst = gen_reg_rtx (mode);
8622
    }
8623
 
8624
  /* Both source operands cannot be in memory.  */
8625
  if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8626
    {
8627
      if (matching_memory != 2)
8628
        src2 = force_reg (mode, src2);
8629
      else
8630
        src1 = force_reg (mode, src1);
8631
    }
8632
 
8633
  /* If the operation is not commutable, source 1 cannot be a constant
8634
     or non-matching memory.  */
8635
  if ((CONSTANT_P (src1)
8636
       || (!matching_memory && GET_CODE (src1) == MEM))
8637
      && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8638
    src1 = force_reg (mode, src1);
8639
 
8640
  src1 = operands[1] = src1;
8641
  src2 = operands[2] = src2;
8642
  return dst;
8643
}
8644
 
8645
/* Similarly, but assume that the destination has already been
8646
   set up properly.  */
8647
 
8648
void
8649
ix86_fixup_binary_operands_no_copy (enum rtx_code code,
8650
                                    enum machine_mode mode, rtx operands[])
8651
{
8652
  rtx dst = ix86_fixup_binary_operands (code, mode, operands);
8653
  gcc_assert (dst == operands[0]);
8654
}
8655
 
8656
/* Attempt to expand a binary operator.  Make the expansion closer to the
8657
   actual machine, then just general_operand, which will allow 3 separate
8658
   memory references (one output, two input) in a single insn.  */
8659
 
8660
void
8661
ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8662
                             rtx operands[])
8663
{
8664
  rtx src1, src2, dst, op, clob;
8665
 
8666
  dst = ix86_fixup_binary_operands (code, mode, operands);
8667
  src1 = operands[1];
8668
  src2 = operands[2];
8669
 
8670
 /* Emit the instruction.  */
8671
 
8672
  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8673
  if (reload_in_progress)
8674
    {
8675
      /* Reload doesn't know about the flags register, and doesn't know that
8676
         it doesn't want to clobber it.  We can only do this with PLUS.  */
8677
      gcc_assert (code == PLUS);
8678
      emit_insn (op);
8679
    }
8680
  else
8681
    {
8682
      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8683
      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8684
    }
8685
 
8686
  /* Fix up the destination if needed.  */
8687
  if (dst != operands[0])
8688
    emit_move_insn (operands[0], dst);
8689
}
8690
 
8691
/* Return TRUE or FALSE depending on whether the binary operator meets the
8692
   appropriate constraints.  */
8693
 
8694
int
8695
ix86_binary_operator_ok (enum rtx_code code,
8696
                         enum machine_mode mode ATTRIBUTE_UNUSED,
8697
                         rtx operands[3])
8698
{
8699
  /* Both source operands cannot be in memory.  */
8700
  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8701
    return 0;
8702
  /* If the operation is not commutable, source 1 cannot be a constant.  */
8703
  if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8704
    return 0;
8705
  /* If the destination is memory, we must have a matching source operand.  */
8706
  if (GET_CODE (operands[0]) == MEM
8707
      && ! (rtx_equal_p (operands[0], operands[1])
8708
            || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8709
                && rtx_equal_p (operands[0], operands[2]))))
8710
    return 0;
8711
  /* If the operation is not commutable and the source 1 is memory, we must
8712
     have a matching destination.  */
8713
  if (GET_CODE (operands[1]) == MEM
8714
      && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8715
      && ! rtx_equal_p (operands[0], operands[1]))
8716
    return 0;
8717
  return 1;
8718
}
8719
 
8720
/* Attempt to expand a unary operator.  Make the expansion closer to the
8721
   actual machine, then just general_operand, which will allow 2 separate
8722
   memory references (one output, one input) in a single insn.  */
8723
 
8724
void
8725
ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8726
                            rtx operands[])
8727
{
8728
  int matching_memory;
8729
  rtx src, dst, op, clob;
8730
 
8731
  dst = operands[0];
8732
  src = operands[1];
8733
 
8734
  /* If the destination is memory, and we do not have matching source
8735
     operands, do things in registers.  */
8736
  matching_memory = 0;
8737
  if (MEM_P (dst))
8738
    {
8739
      if (rtx_equal_p (dst, src))
8740
        matching_memory = 1;
8741
      else
8742
        dst = gen_reg_rtx (mode);
8743
    }
8744
 
8745
  /* When source operand is memory, destination must match.  */
8746
  if (MEM_P (src) && !matching_memory)
8747
    src = force_reg (mode, src);
8748
 
8749
  /* Emit the instruction.  */
8750
 
8751
  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8752
  if (reload_in_progress || code == NOT)
8753
    {
8754
      /* Reload doesn't know about the flags register, and doesn't know that
8755
         it doesn't want to clobber it.  */
8756
      gcc_assert (code == NOT);
8757
      emit_insn (op);
8758
    }
8759
  else
8760
    {
8761
      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8762
      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8763
    }
8764
 
8765
  /* Fix up the destination if needed.  */
8766
  if (dst != operands[0])
8767
    emit_move_insn (operands[0], dst);
8768
}
8769
 
8770
/* Return TRUE or FALSE depending on whether the unary operator meets the
8771
   appropriate constraints.  */
8772
 
8773
int
8774
ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8775
                        enum machine_mode mode ATTRIBUTE_UNUSED,
8776
                        rtx operands[2] ATTRIBUTE_UNUSED)
8777
{
8778
  /* If one of operands is memory, source and destination must match.  */
8779
  if ((GET_CODE (operands[0]) == MEM
8780
       || GET_CODE (operands[1]) == MEM)
8781
      && ! rtx_equal_p (operands[0], operands[1]))
8782
    return FALSE;
8783
  return TRUE;
8784
}
8785
 
8786
/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8787
   Create a mask for the sign bit in MODE for an SSE register.  If VECT is
8788
   true, then replicate the mask for all elements of the vector register.
8789
   If INVERT is true, then create a mask excluding the sign bit.  */
8790
 
8791
rtx
8792
ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
8793
{
8794
  enum machine_mode vec_mode;
8795
  HOST_WIDE_INT hi, lo;
8796
  int shift = 63;
8797
  rtvec v;
8798
  rtx mask;
8799
 
8800
  /* Find the sign bit, sign extended to 2*HWI.  */
8801
  if (mode == SFmode)
8802
    lo = 0x80000000, hi = lo < 0;
8803
  else if (HOST_BITS_PER_WIDE_INT >= 64)
8804
    lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8805
  else
8806
    lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8807
 
8808
  if (invert)
8809
    lo = ~lo, hi = ~hi;
8810
 
8811
  /* Force this value into the low part of a fp vector constant.  */
8812
  mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
8813
  mask = gen_lowpart (mode, mask);
8814
 
8815
  if (mode == SFmode)
8816
    {
8817
      if (vect)
8818
        v = gen_rtvec (4, mask, mask, mask, mask);
8819
      else
8820
        v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8821
                       CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8822
      vec_mode = V4SFmode;
8823
    }
8824
  else
8825
    {
8826
      if (vect)
8827
        v = gen_rtvec (2, mask, mask);
8828
      else
8829
        v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8830
      vec_mode = V2DFmode;
8831
    }
8832
 
8833
  return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
8834
}
8835
 
8836
/* Generate code for floating point ABS or NEG.  */
8837
 
8838
void
8839
ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
8840
                                rtx operands[])
8841
{
8842
  rtx mask, set, use, clob, dst, src;
8843
  bool matching_memory;
8844
  bool use_sse = false;
8845
  bool vector_mode = VECTOR_MODE_P (mode);
8846
  enum machine_mode elt_mode = mode;
8847
 
8848
  if (vector_mode)
8849
    {
8850
      elt_mode = GET_MODE_INNER (mode);
8851
      use_sse = true;
8852
    }
8853
  else if (TARGET_SSE_MATH)
8854
    use_sse = SSE_FLOAT_MODE_P (mode);
8855
 
8856
  /* NEG and ABS performed with SSE use bitwise mask operations.
8857
     Create the appropriate mask now.  */
8858
  if (use_sse)
8859
    mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
8860
  else
8861
    {
8862
      /* When not using SSE, we don't use the mask, but prefer to keep the
8863
         same general form of the insn pattern to reduce duplication when
8864
         it comes time to split.  */
8865
      mask = const0_rtx;
8866
    }
8867
 
8868
  dst = operands[0];
8869
  src = operands[1];
8870
 
8871
  /* If the destination is memory, and we don't have matching source
8872
     operands, do things in registers.  */
8873
  matching_memory = false;
8874
  if (MEM_P (dst))
8875
    {
8876
      if (rtx_equal_p (dst, src))
8877
        matching_memory = true;
8878
      else
8879
        dst = gen_reg_rtx (mode);
8880
    }
8881
  if (MEM_P (src) && !matching_memory)
8882
    src = force_reg (mode, src);
8883
 
8884
  if (vector_mode)
8885
    {
8886
      set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8887
      set = gen_rtx_SET (VOIDmode, dst, set);
8888
      emit_insn (set);
8889
    }
8890
  else
8891
    {
8892
      set = gen_rtx_fmt_e (code, mode, src);
8893
      set = gen_rtx_SET (VOIDmode, dst, set);
8894
      use = gen_rtx_USE (VOIDmode, mask);
8895
      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8896
      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8897
    }
8898
 
8899
  if (dst != operands[0])
8900
    emit_move_insn (operands[0], dst);
8901
}
8902
 
8903
/* Expand a copysign operation.  Special case operand 0 being a constant.  */
8904
 
8905
void
8906
ix86_expand_copysign (rtx operands[])
8907
{
8908
  enum machine_mode mode, vmode;
8909
  rtx dest, op0, op1, mask, nmask;
8910
 
8911
  dest = operands[0];
8912
  op0 = operands[1];
8913
  op1 = operands[2];
8914
 
8915
  mode = GET_MODE (dest);
8916
  vmode = mode == SFmode ? V4SFmode : V2DFmode;
8917
 
8918
  if (GET_CODE (op0) == CONST_DOUBLE)
8919
    {
8920
      rtvec v;
8921
 
8922
      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
8923
        op0 = simplify_unary_operation (ABS, mode, op0, mode);
8924
 
8925
      if (op0 == CONST0_RTX (mode))
8926
        op0 = CONST0_RTX (vmode);
8927
      else
8928
        {
8929
          if (mode == SFmode)
8930
            v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
8931
                           CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8932
          else
8933
            v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
8934
          op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
8935
        }
8936
 
8937
      mask = ix86_build_signbit_mask (mode, 0, 0);
8938
 
8939
      if (mode == SFmode)
8940
        emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
8941
      else
8942
        emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
8943
    }
8944
  else
8945
    {
8946
      nmask = ix86_build_signbit_mask (mode, 0, 1);
8947
      mask = ix86_build_signbit_mask (mode, 0, 0);
8948
 
8949
      if (mode == SFmode)
8950
        emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
8951
      else
8952
        emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
8953
    }
8954
}
8955
 
8956
/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
8957
   be a constant, and so has already been expanded into a vector constant.  */
8958
 
8959
void
8960
ix86_split_copysign_const (rtx operands[])
8961
{
8962
  enum machine_mode mode, vmode;
8963
  rtx dest, op0, op1, mask, x;
8964
 
8965
  dest = operands[0];
8966
  op0 = operands[1];
8967
  op1 = operands[2];
8968
  mask = operands[3];
8969
 
8970
  mode = GET_MODE (dest);
8971
  vmode = GET_MODE (mask);
8972
 
8973
  dest = simplify_gen_subreg (vmode, dest, mode, 0);
8974
  x = gen_rtx_AND (vmode, dest, mask);
8975
  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8976
 
8977
  if (op0 != CONST0_RTX (vmode))
8978
    {
8979
      x = gen_rtx_IOR (vmode, dest, op0);
8980
      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
8981
    }
8982
}
8983
 
8984
/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
8985
   so we have to do two masks.  */
8986
 
8987
void
8988
ix86_split_copysign_var (rtx operands[])
8989
{
8990
  enum machine_mode mode, vmode;
8991
  rtx dest, scratch, op0, op1, mask, nmask, x;
8992
 
8993
  dest = operands[0];
8994
  scratch = operands[1];
8995
  op0 = operands[2];
8996
  op1 = operands[3];
8997
  nmask = operands[4];
8998
  mask = operands[5];
8999
 
9000
  mode = GET_MODE (dest);
9001
  vmode = GET_MODE (mask);
9002
 
9003
  if (rtx_equal_p (op0, op1))
9004
    {
9005
      /* Shouldn't happen often (it's useless, obviously), but when it does
9006
         we'd generate incorrect code if we continue below.  */
9007
      emit_move_insn (dest, op0);
9008
      return;
9009
    }
9010
 
9011
  if (REG_P (mask) && REGNO (dest) == REGNO (mask))     /* alternative 0 */
9012
    {
9013
      gcc_assert (REGNO (op1) == REGNO (scratch));
9014
 
9015
      x = gen_rtx_AND (vmode, scratch, mask);
9016
      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9017
 
9018
      dest = mask;
9019
      op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9020
      x = gen_rtx_NOT (vmode, dest);
9021
      x = gen_rtx_AND (vmode, x, op0);
9022
      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9023
    }
9024
  else
9025
    {
9026
      if (REGNO (op1) == REGNO (scratch))               /* alternative 1,3 */
9027
        {
9028
          x = gen_rtx_AND (vmode, scratch, mask);
9029
        }
9030
      else                                              /* alternative 2,4 */
9031
        {
9032
          gcc_assert (REGNO (mask) == REGNO (scratch));
9033
          op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9034
          x = gen_rtx_AND (vmode, scratch, op1);
9035
        }
9036
      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9037
 
9038
      if (REGNO (op0) == REGNO (dest))                  /* alternative 1,2 */
9039
        {
9040
          dest = simplify_gen_subreg (vmode, op0, mode, 0);
9041
          x = gen_rtx_AND (vmode, dest, nmask);
9042
        }
9043
      else                                              /* alternative 3,4 */
9044
        {
9045
          gcc_assert (REGNO (nmask) == REGNO (dest));
9046
          dest = nmask;
9047
          op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9048
          x = gen_rtx_AND (vmode, dest, op0);
9049
        }
9050
      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9051
    }
9052
 
9053
  x = gen_rtx_IOR (vmode, dest, scratch);
9054
  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9055
}
9056
 
9057
/* Return TRUE or FALSE depending on whether the first SET in INSN
9058
   has source and destination with matching CC modes, and that the
9059
   CC mode is at least as constrained as REQ_MODE.  */
9060
 
9061
int
9062
ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9063
{
9064
  rtx set;
9065
  enum machine_mode set_mode;
9066
 
9067
  set = PATTERN (insn);
9068
  if (GET_CODE (set) == PARALLEL)
9069
    set = XVECEXP (set, 0, 0);
9070
  gcc_assert (GET_CODE (set) == SET);
9071
  gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9072
 
9073
  set_mode = GET_MODE (SET_DEST (set));
9074
  switch (set_mode)
9075
    {
9076
    case CCNOmode:
9077
      if (req_mode != CCNOmode
9078
          && (req_mode != CCmode
9079
              || XEXP (SET_SRC (set), 1) != const0_rtx))
9080
        return 0;
9081
      break;
9082
    case CCmode:
9083
      if (req_mode == CCGCmode)
9084
        return 0;
9085
      /* FALLTHRU */
9086
    case CCGCmode:
9087
      if (req_mode == CCGOCmode || req_mode == CCNOmode)
9088
        return 0;
9089
      /* FALLTHRU */
9090
    case CCGOCmode:
9091
      if (req_mode == CCZmode)
9092
        return 0;
9093
      /* FALLTHRU */
9094
    case CCZmode:
9095
      break;
9096
 
9097
    default:
9098
      gcc_unreachable ();
9099
    }
9100
 
9101
  return (GET_MODE (SET_SRC (set)) == set_mode);
9102
}
9103
 
9104
/* Generate insn patterns to do an integer compare of OPERANDS.  */
9105
 
9106
static rtx
9107
ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9108
{
9109
  enum machine_mode cmpmode;
9110
  rtx tmp, flags;
9111
 
9112
  cmpmode = SELECT_CC_MODE (code, op0, op1);
9113
  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9114
 
9115
  /* This is very simple, but making the interface the same as in the
9116
     FP case makes the rest of the code easier.  */
9117
  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9118
  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9119
 
9120
  /* Return the test that should be put into the flags user, i.e.
9121
     the bcc, scc, or cmov instruction.  */
9122
  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9123
}
9124
 
9125
/* Figure out whether to use ordered or unordered fp comparisons.
9126
   Return the appropriate mode to use.  */
9127
 
9128
enum machine_mode
9129
ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9130
{
9131
  /* ??? In order to make all comparisons reversible, we do all comparisons
9132
     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
9133
     all forms trapping and nontrapping comparisons, we can make inequality
9134
     comparisons trapping again, since it results in better code when using
9135
     FCOM based compares.  */
9136
  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9137
}
9138
 
9139
enum machine_mode
9140
ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9141
{
9142
  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9143
    return ix86_fp_compare_mode (code);
9144
  switch (code)
9145
    {
9146
      /* Only zero flag is needed.  */
9147
    case EQ:                    /* ZF=0 */
9148
    case NE:                    /* ZF!=0 */
9149
      return CCZmode;
9150
      /* Codes needing carry flag.  */
9151
    case GEU:                   /* CF=0 */
9152
    case GTU:                   /* CF=0 & ZF=0 */
9153
    case LTU:                   /* CF=1 */
9154
    case LEU:                   /* CF=1 | ZF=1 */
9155
      return CCmode;
9156
      /* Codes possibly doable only with sign flag when
9157
         comparing against zero.  */
9158
    case GE:                    /* SF=OF   or   SF=0 */
9159
    case LT:                    /* SF<>OF  or   SF=1 */
9160
      if (op1 == const0_rtx)
9161
        return CCGOCmode;
9162
      else
9163
        /* For other cases Carry flag is not required.  */
9164
        return CCGCmode;
9165
      /* Codes doable only with sign flag when comparing
9166
         against zero, but we miss jump instruction for it
9167
         so we need to use relational tests against overflow
9168
         that thus needs to be zero.  */
9169
    case GT:                    /* ZF=0 & SF=OF */
9170
    case LE:                    /* ZF=1 | SF<>OF */
9171
      if (op1 == const0_rtx)
9172
        return CCNOmode;
9173
      else
9174
        return CCGCmode;
9175
      /* strcmp pattern do (use flags) and combine may ask us for proper
9176
         mode.  */
9177
    case USE:
9178
      return CCmode;
9179
    default:
9180
      gcc_unreachable ();
9181
    }
9182
}
9183
 
9184
/* Return the fixed registers used for condition codes.  */
9185
 
9186
static bool
9187
ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9188
{
9189
  *p1 = FLAGS_REG;
9190
  *p2 = FPSR_REG;
9191
  return true;
9192
}
9193
 
9194
/* If two condition code modes are compatible, return a condition code
9195
   mode which is compatible with both.  Otherwise, return
9196
   VOIDmode.  */
9197
 
9198
static enum machine_mode
9199
ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9200
{
9201
  if (m1 == m2)
9202
    return m1;
9203
 
9204
  if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9205
    return VOIDmode;
9206
 
9207
  if ((m1 == CCGCmode && m2 == CCGOCmode)
9208
      || (m1 == CCGOCmode && m2 == CCGCmode))
9209
    return CCGCmode;
9210
 
9211
  switch (m1)
9212
    {
9213
    default:
9214
      gcc_unreachable ();
9215
 
9216
    case CCmode:
9217
    case CCGCmode:
9218
    case CCGOCmode:
9219
    case CCNOmode:
9220
    case CCZmode:
9221
      switch (m2)
9222
        {
9223
        default:
9224
          return VOIDmode;
9225
 
9226
        case CCmode:
9227
        case CCGCmode:
9228
        case CCGOCmode:
9229
        case CCNOmode:
9230
        case CCZmode:
9231
          return CCmode;
9232
        }
9233
 
9234
    case CCFPmode:
9235
    case CCFPUmode:
9236
      /* These are only compatible with themselves, which we already
9237
         checked above.  */
9238
      return VOIDmode;
9239
    }
9240
}
9241
 
9242
/* Return true if we should use an FCOMI instruction for this fp comparison.  */
9243
 
9244
int
9245
ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9246
{
9247
  enum rtx_code swapped_code = swap_condition (code);
9248
  return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9249
          || (ix86_fp_comparison_cost (swapped_code)
9250
              == ix86_fp_comparison_fcomi_cost (swapped_code)));
9251
}
9252
 
9253
/* Swap, force into registers, or otherwise massage the two operands
9254
   to a fp comparison.  The operands are updated in place; the new
9255
   comparison code is returned.  */
9256
 
9257
static enum rtx_code
9258
ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9259
{
9260
  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9261
  rtx op0 = *pop0, op1 = *pop1;
9262
  enum machine_mode op_mode = GET_MODE (op0);
9263
  int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9264
 
9265
  /* All of the unordered compare instructions only work on registers.
9266
     The same is true of the fcomi compare instructions.  The XFmode
9267
     compare instructions require registers except when comparing
9268
     against zero or when converting operand 1 from fixed point to
9269
     floating point.  */
9270
 
9271
  if (!is_sse
9272
      && (fpcmp_mode == CCFPUmode
9273
          || (op_mode == XFmode
9274
              && ! (standard_80387_constant_p (op0) == 1
9275
                    || standard_80387_constant_p (op1) == 1)
9276
              && GET_CODE (op1) != FLOAT)
9277
          || ix86_use_fcomi_compare (code)))
9278
    {
9279
      op0 = force_reg (op_mode, op0);
9280
      op1 = force_reg (op_mode, op1);
9281
    }
9282
  else
9283
    {
9284
      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
9285
         things around if they appear profitable, otherwise force op0
9286
         into a register.  */
9287
 
9288
      if (standard_80387_constant_p (op0) == 0
9289
          || (GET_CODE (op0) == MEM
9290
              && ! (standard_80387_constant_p (op1) == 0
9291
                    || GET_CODE (op1) == MEM)))
9292
        {
9293
          rtx tmp;
9294
          tmp = op0, op0 = op1, op1 = tmp;
9295
          code = swap_condition (code);
9296
        }
9297
 
9298
      if (GET_CODE (op0) != REG)
9299
        op0 = force_reg (op_mode, op0);
9300
 
9301
      if (CONSTANT_P (op1))
9302
        {
9303
          int tmp = standard_80387_constant_p (op1);
9304
          if (tmp == 0)
9305
            op1 = validize_mem (force_const_mem (op_mode, op1));
9306
          else if (tmp == 1)
9307
            {
9308
              if (TARGET_CMOVE)
9309
                op1 = force_reg (op_mode, op1);
9310
            }
9311
          else
9312
            op1 = force_reg (op_mode, op1);
9313
        }
9314
    }
9315
 
9316
  /* Try to rearrange the comparison to make it cheaper.  */
9317
  if (ix86_fp_comparison_cost (code)
9318
      > ix86_fp_comparison_cost (swap_condition (code))
9319
      && (GET_CODE (op1) == REG || !no_new_pseudos))
9320
    {
9321
      rtx tmp;
9322
      tmp = op0, op0 = op1, op1 = tmp;
9323
      code = swap_condition (code);
9324
      if (GET_CODE (op0) != REG)
9325
        op0 = force_reg (op_mode, op0);
9326
    }
9327
 
9328
  *pop0 = op0;
9329
  *pop1 = op1;
9330
  return code;
9331
}
9332
 
9333
/* Convert comparison codes we use to represent FP comparison to integer
9334
   code that will result in proper branch.  Return UNKNOWN if no such code
9335
   is available.  */
9336
 
9337
enum rtx_code
9338
ix86_fp_compare_code_to_integer (enum rtx_code code)
9339
{
9340
  switch (code)
9341
    {
9342
    case GT:
9343
      return GTU;
9344
    case GE:
9345
      return GEU;
9346
    case ORDERED:
9347
    case UNORDERED:
9348
      return code;
9349
      break;
9350
    case UNEQ:
9351
      return EQ;
9352
      break;
9353
    case UNLT:
9354
      return LTU;
9355
      break;
9356
    case UNLE:
9357
      return LEU;
9358
      break;
9359
    case LTGT:
9360
      return NE;
9361
      break;
9362
    default:
9363
      return UNKNOWN;
9364
    }
9365
}
9366
 
9367
/* Split comparison code CODE into comparisons we can do using branch
9368
   instructions.  BYPASS_CODE is comparison code for branch that will
9369
   branch around FIRST_CODE and SECOND_CODE.  If some of branches
9370
   is not required, set value to UNKNOWN.
9371
   We never require more than two branches.  */
9372
 
9373
void
9374
ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9375
                          enum rtx_code *first_code,
9376
                          enum rtx_code *second_code)
9377
{
9378
  *first_code = code;
9379
  *bypass_code = UNKNOWN;
9380
  *second_code = UNKNOWN;
9381
 
9382
  /* The fcomi comparison sets flags as follows:
9383
 
9384
     cmp    ZF PF CF
9385
     >      0  0  0
9386
     <      0  0  1
9387
     =      1  0  0
9388
     un     1  1  1 */
9389
 
9390
  switch (code)
9391
    {
9392
    case GT:                    /* GTU - CF=0 & ZF=0 */
9393
    case GE:                    /* GEU - CF=0 */
9394
    case ORDERED:               /* PF=0 */
9395
    case UNORDERED:             /* PF=1 */
9396
    case UNEQ:                  /* EQ - ZF=1 */
9397
    case UNLT:                  /* LTU - CF=1 */
9398
    case UNLE:                  /* LEU - CF=1 | ZF=1 */
9399
    case LTGT:                  /* EQ - ZF=0 */
9400
      break;
9401
    case LT:                    /* LTU - CF=1 - fails on unordered */
9402
      *first_code = UNLT;
9403
      *bypass_code = UNORDERED;
9404
      break;
9405
    case LE:                    /* LEU - CF=1 | ZF=1 - fails on unordered */
9406
      *first_code = UNLE;
9407
      *bypass_code = UNORDERED;
9408
      break;
9409
    case EQ:                    /* EQ - ZF=1 - fails on unordered */
9410
      *first_code = UNEQ;
9411
      *bypass_code = UNORDERED;
9412
      break;
9413
    case NE:                    /* NE - ZF=0 - fails on unordered */
9414
      *first_code = LTGT;
9415
      *second_code = UNORDERED;
9416
      break;
9417
    case UNGE:                  /* GEU - CF=0 - fails on unordered */
9418
      *first_code = GE;
9419
      *second_code = UNORDERED;
9420
      break;
9421
    case UNGT:                  /* GTU - CF=0 & ZF=0 - fails on unordered */
9422
      *first_code = GT;
9423
      *second_code = UNORDERED;
9424
      break;
9425
    default:
9426
      gcc_unreachable ();
9427
    }
9428
  if (!TARGET_IEEE_FP)
9429
    {
9430
      *second_code = UNKNOWN;
9431
      *bypass_code = UNKNOWN;
9432
    }
9433
}
9434
 
9435
/* Return cost of comparison done fcom + arithmetics operations on AX.
9436
   All following functions do use number of instructions as a cost metrics.
9437
   In future this should be tweaked to compute bytes for optimize_size and
9438
   take into account performance of various instructions on various CPUs.  */
9439
static int
9440
ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9441
{
9442
  if (!TARGET_IEEE_FP)
9443
    return 4;
9444
  /* The cost of code output by ix86_expand_fp_compare.  */
9445
  switch (code)
9446
    {
9447
    case UNLE:
9448
    case UNLT:
9449
    case LTGT:
9450
    case GT:
9451
    case GE:
9452
    case UNORDERED:
9453
    case ORDERED:
9454
    case UNEQ:
9455
      return 4;
9456
      break;
9457
    case LT:
9458
    case NE:
9459
    case EQ:
9460
    case UNGE:
9461
      return 5;
9462
      break;
9463
    case LE:
9464
    case UNGT:
9465
      return 6;
9466
      break;
9467
    default:
9468
      gcc_unreachable ();
9469
    }
9470
}
9471
 
9472
/* Return cost of comparison done using fcomi operation.
9473
   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
9474
static int
9475
ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9476
{
9477
  enum rtx_code bypass_code, first_code, second_code;
9478
  /* Return arbitrarily high cost when instruction is not supported - this
9479
     prevents gcc from using it.  */
9480
  if (!TARGET_CMOVE)
9481
    return 1024;
9482
  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9483
  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9484
}
9485
 
9486
/* Return cost of comparison done using sahf operation.
9487
   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
9488
static int
9489
ix86_fp_comparison_sahf_cost (enum rtx_code code)
9490
{
9491
  enum rtx_code bypass_code, first_code, second_code;
9492
  /* Return arbitrarily high cost when instruction is not preferred - this
9493
     avoids gcc from using it.  */
9494
  if (!TARGET_USE_SAHF && !optimize_size)
9495
    return 1024;
9496
  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9497
  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9498
}
9499
 
9500
/* Compute cost of the comparison done using any method.
9501
   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
9502
static int
9503
ix86_fp_comparison_cost (enum rtx_code code)
9504
{
9505
  int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9506
  int min;
9507
 
9508
  fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9509
  sahf_cost = ix86_fp_comparison_sahf_cost (code);
9510
 
9511
  min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9512
  if (min > sahf_cost)
9513
    min = sahf_cost;
9514
  if (min > fcomi_cost)
9515
    min = fcomi_cost;
9516
  return min;
9517
}
9518
 
9519
/* Generate insn patterns to do a floating point compare of OPERANDS.  */
9520
 
9521
static rtx
9522
ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9523
                        rtx *second_test, rtx *bypass_test)
9524
{
9525
  enum machine_mode fpcmp_mode, intcmp_mode;
9526
  rtx tmp, tmp2;
9527
  int cost = ix86_fp_comparison_cost (code);
9528
  enum rtx_code bypass_code, first_code, second_code;
9529
 
9530
  fpcmp_mode = ix86_fp_compare_mode (code);
9531
  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9532
 
9533
  if (second_test)
9534
    *second_test = NULL_RTX;
9535
  if (bypass_test)
9536
    *bypass_test = NULL_RTX;
9537
 
9538
  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9539
 
9540
  /* Do fcomi/sahf based test when profitable.  */
9541
  if ((bypass_code == UNKNOWN || bypass_test)
9542
      && (second_code == UNKNOWN || second_test)
9543
      && ix86_fp_comparison_arithmetics_cost (code) > cost)
9544
    {
9545
      if (TARGET_CMOVE)
9546
        {
9547
          tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9548
          tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9549
                             tmp);
9550
          emit_insn (tmp);
9551
        }
9552
      else
9553
        {
9554
          tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9555
          tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9556
          if (!scratch)
9557
            scratch = gen_reg_rtx (HImode);
9558
          emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9559
          emit_insn (gen_x86_sahf_1 (scratch));
9560
        }
9561
 
9562
      /* The FP codes work out to act like unsigned.  */
9563
      intcmp_mode = fpcmp_mode;
9564
      code = first_code;
9565
      if (bypass_code != UNKNOWN)
9566
        *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9567
                                       gen_rtx_REG (intcmp_mode, FLAGS_REG),
9568
                                       const0_rtx);
9569
      if (second_code != UNKNOWN)
9570
        *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9571
                                       gen_rtx_REG (intcmp_mode, FLAGS_REG),
9572
                                       const0_rtx);
9573
    }
9574
  else
9575
    {
9576
      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
9577
      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9578
      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9579
      if (!scratch)
9580
        scratch = gen_reg_rtx (HImode);
9581
      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9582
 
9583
      /* In the unordered case, we have to check C2 for NaN's, which
9584
         doesn't happen to work out to anything nice combination-wise.
9585
         So do some bit twiddling on the value we've got in AH to come
9586
         up with an appropriate set of condition codes.  */
9587
 
9588
      intcmp_mode = CCNOmode;
9589
      switch (code)
9590
        {
9591
        case GT:
9592
        case UNGT:
9593
          if (code == GT || !TARGET_IEEE_FP)
9594
            {
9595
              emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9596
              code = EQ;
9597
            }
9598
          else
9599
            {
9600
              emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9601
              emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9602
              emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9603
              intcmp_mode = CCmode;
9604
              code = GEU;
9605
            }
9606
          break;
9607
        case LT:
9608
        case UNLT:
9609
          if (code == LT && TARGET_IEEE_FP)
9610
            {
9611
              emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9612
              emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9613
              intcmp_mode = CCmode;
9614
              code = EQ;
9615
            }
9616
          else
9617
            {
9618
              emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9619
              code = NE;
9620
            }
9621
          break;
9622
        case GE:
9623
        case UNGE:
9624
          if (code == GE || !TARGET_IEEE_FP)
9625
            {
9626
              emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9627
              code = EQ;
9628
            }
9629
          else
9630
            {
9631
              emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9632
              emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9633
                                             GEN_INT (0x01)));
9634
              code = NE;
9635
            }
9636
          break;
9637
        case LE:
9638
        case UNLE:
9639
          if (code == LE && TARGET_IEEE_FP)
9640
            {
9641
              emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9642
              emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9643
              emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9644
              intcmp_mode = CCmode;
9645
              code = LTU;
9646
            }
9647
          else
9648
            {
9649
              emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9650
              code = NE;
9651
            }
9652
          break;
9653
        case EQ:
9654
        case UNEQ:
9655
          if (code == EQ && TARGET_IEEE_FP)
9656
            {
9657
              emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9658
              emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9659
              intcmp_mode = CCmode;
9660
              code = EQ;
9661
            }
9662
          else
9663
            {
9664
              emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9665
              code = NE;
9666
              break;
9667
            }
9668
          break;
9669
        case NE:
9670
        case LTGT:
9671
          if (code == NE && TARGET_IEEE_FP)
9672
            {
9673
              emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9674
              emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9675
                                             GEN_INT (0x40)));
9676
              code = NE;
9677
            }
9678
          else
9679
            {
9680
              emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9681
              code = EQ;
9682
            }
9683
          break;
9684
 
9685
        case UNORDERED:
9686
          emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9687
          code = NE;
9688
          break;
9689
        case ORDERED:
9690
          emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9691
          code = EQ;
9692
          break;
9693
 
9694
        default:
9695
          gcc_unreachable ();
9696
        }
9697
    }
9698
 
9699
  /* Return the test that should be put into the flags user, i.e.
9700
     the bcc, scc, or cmov instruction.  */
9701
  return gen_rtx_fmt_ee (code, VOIDmode,
9702
                         gen_rtx_REG (intcmp_mode, FLAGS_REG),
9703
                         const0_rtx);
9704
}
9705
 
9706
rtx
9707
ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9708
{
9709
  rtx op0, op1, ret;
9710
  op0 = ix86_compare_op0;
9711
  op1 = ix86_compare_op1;
9712
 
9713
  if (second_test)
9714
    *second_test = NULL_RTX;
9715
  if (bypass_test)
9716
    *bypass_test = NULL_RTX;
9717
 
9718
  if (ix86_compare_emitted)
9719
    {
9720
      ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
9721
      ix86_compare_emitted = NULL_RTX;
9722
    }
9723
  else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9724
    ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9725
                                  second_test, bypass_test);
9726
  else
9727
    ret = ix86_expand_int_compare (code, op0, op1);
9728
 
9729
  return ret;
9730
}
9731
 
9732
/* Return true if the CODE will result in nontrivial jump sequence.  */
9733
bool
9734
ix86_fp_jump_nontrivial_p (enum rtx_code code)
9735
{
9736
  enum rtx_code bypass_code, first_code, second_code;
9737
  if (!TARGET_CMOVE)
9738
    return true;
9739
  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9740
  return bypass_code != UNKNOWN || second_code != UNKNOWN;
9741
}
9742
 
9743
void
9744
ix86_expand_branch (enum rtx_code code, rtx label)
9745
{
9746
  rtx tmp;
9747
 
9748
  switch (GET_MODE (ix86_compare_op0))
9749
    {
9750
    case QImode:
9751
    case HImode:
9752
    case SImode:
9753
      simple:
9754
      tmp = ix86_expand_compare (code, NULL, NULL);
9755
      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9756
                                  gen_rtx_LABEL_REF (VOIDmode, label),
9757
                                  pc_rtx);
9758
      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9759
      return;
9760
 
9761
    case SFmode:
9762
    case DFmode:
9763
    case XFmode:
9764
      {
9765
        rtvec vec;
9766
        int use_fcomi;
9767
        enum rtx_code bypass_code, first_code, second_code;
9768
 
9769
        code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9770
                                             &ix86_compare_op1);
9771
 
9772
        ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9773
 
9774
        /* Check whether we will use the natural sequence with one jump.  If
9775
           so, we can expand jump early.  Otherwise delay expansion by
9776
           creating compound insn to not confuse optimizers.  */
9777
        if (bypass_code == UNKNOWN && second_code == UNKNOWN
9778
            && TARGET_CMOVE)
9779
          {
9780
            ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9781
                                  gen_rtx_LABEL_REF (VOIDmode, label),
9782
                                  pc_rtx, NULL_RTX, NULL_RTX);
9783
          }
9784
        else
9785
          {
9786
            tmp = gen_rtx_fmt_ee (code, VOIDmode,
9787
                                  ix86_compare_op0, ix86_compare_op1);
9788
            tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9789
                                        gen_rtx_LABEL_REF (VOIDmode, label),
9790
                                        pc_rtx);
9791
            tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9792
 
9793
            use_fcomi = ix86_use_fcomi_compare (code);
9794
            vec = rtvec_alloc (3 + !use_fcomi);
9795
            RTVEC_ELT (vec, 0) = tmp;
9796
            RTVEC_ELT (vec, 1)
9797
              = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9798
            RTVEC_ELT (vec, 2)
9799
              = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9800
            if (! use_fcomi)
9801
              RTVEC_ELT (vec, 3)
9802
                = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9803
 
9804
            emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9805
          }
9806
        return;
9807
      }
9808
 
9809
    case DImode:
9810
      if (TARGET_64BIT)
9811
        goto simple;
9812
    case TImode:
9813
      /* Expand DImode branch into multiple compare+branch.  */
9814
      {
9815
        rtx lo[2], hi[2], label2;
9816
        enum rtx_code code1, code2, code3;
9817
        enum machine_mode submode;
9818
 
9819
        if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9820
          {
9821
            tmp = ix86_compare_op0;
9822
            ix86_compare_op0 = ix86_compare_op1;
9823
            ix86_compare_op1 = tmp;
9824
            code = swap_condition (code);
9825
          }
9826
        if (GET_MODE (ix86_compare_op0) == DImode)
9827
          {
9828
            split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9829
            split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9830
            submode = SImode;
9831
          }
9832
        else
9833
          {
9834
            split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
9835
            split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
9836
            submode = DImode;
9837
          }
9838
 
9839
        /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9840
           avoid two branches.  This costs one extra insn, so disable when
9841
           optimizing for size.  */
9842
 
9843
        if ((code == EQ || code == NE)
9844
            && (!optimize_size
9845
                || hi[1] == const0_rtx || lo[1] == const0_rtx))
9846
          {
9847
            rtx xor0, xor1;
9848
 
9849
            xor1 = hi[0];
9850
            if (hi[1] != const0_rtx)
9851
              xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
9852
                                   NULL_RTX, 0, OPTAB_WIDEN);
9853
 
9854
            xor0 = lo[0];
9855
            if (lo[1] != const0_rtx)
9856
              xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
9857
                                   NULL_RTX, 0, OPTAB_WIDEN);
9858
 
9859
            tmp = expand_binop (submode, ior_optab, xor1, xor0,
9860
                                NULL_RTX, 0, OPTAB_WIDEN);
9861
 
9862
            ix86_compare_op0 = tmp;
9863
            ix86_compare_op1 = const0_rtx;
9864
            ix86_expand_branch (code, label);
9865
            return;
9866
          }
9867
 
9868
        /* Otherwise, if we are doing less-than or greater-or-equal-than,
9869
           op1 is a constant and the low word is zero, then we can just
9870
           examine the high word.  */
9871
 
9872
        if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9873
          switch (code)
9874
            {
9875
            case LT: case LTU: case GE: case GEU:
9876
              ix86_compare_op0 = hi[0];
9877
              ix86_compare_op1 = hi[1];
9878
              ix86_expand_branch (code, label);
9879
              return;
9880
            default:
9881
              break;
9882
            }
9883
 
9884
        /* Otherwise, we need two or three jumps.  */
9885
 
9886
        label2 = gen_label_rtx ();
9887
 
9888
        code1 = code;
9889
        code2 = swap_condition (code);
9890
        code3 = unsigned_condition (code);
9891
 
9892
        switch (code)
9893
          {
9894
          case LT: case GT: case LTU: case GTU:
9895
            break;
9896
 
9897
          case LE:   code1 = LT;  code2 = GT;  break;
9898
          case GE:   code1 = GT;  code2 = LT;  break;
9899
          case LEU:  code1 = LTU; code2 = GTU; break;
9900
          case GEU:  code1 = GTU; code2 = LTU; break;
9901
 
9902
          case EQ:   code1 = UNKNOWN; code2 = NE;  break;
9903
          case NE:   code2 = UNKNOWN; break;
9904
 
9905
          default:
9906
            gcc_unreachable ();
9907
          }
9908
 
9909
        /*
9910
         * a < b =>
9911
         *    if (hi(a) < hi(b)) goto true;
9912
         *    if (hi(a) > hi(b)) goto false;
9913
         *    if (lo(a) < lo(b)) goto true;
9914
         *  false:
9915
         */
9916
 
9917
        ix86_compare_op0 = hi[0];
9918
        ix86_compare_op1 = hi[1];
9919
 
9920
        if (code1 != UNKNOWN)
9921
          ix86_expand_branch (code1, label);
9922
        if (code2 != UNKNOWN)
9923
          ix86_expand_branch (code2, label2);
9924
 
9925
        ix86_compare_op0 = lo[0];
9926
        ix86_compare_op1 = lo[1];
9927
        ix86_expand_branch (code3, label);
9928
 
9929
        if (code2 != UNKNOWN)
9930
          emit_label (label2);
9931
        return;
9932
      }
9933
 
9934
    default:
9935
      gcc_unreachable ();
9936
    }
9937
}
9938
 
9939
/* Split branch based on floating point condition.  */
9940
void
9941
ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9942
                      rtx target1, rtx target2, rtx tmp, rtx pushed)
9943
{
9944
  rtx second, bypass;
9945
  rtx label = NULL_RTX;
9946
  rtx condition;
9947
  int bypass_probability = -1, second_probability = -1, probability = -1;
9948
  rtx i;
9949
 
9950
  if (target2 != pc_rtx)
9951
    {
9952
      rtx tmp = target2;
9953
      code = reverse_condition_maybe_unordered (code);
9954
      target2 = target1;
9955
      target1 = tmp;
9956
    }
9957
 
9958
  condition = ix86_expand_fp_compare (code, op1, op2,
9959
                                      tmp, &second, &bypass);
9960
 
9961
  /* Remove pushed operand from stack.  */
9962
  if (pushed)
9963
    ix86_free_from_memory (GET_MODE (pushed));
9964
 
9965
  if (split_branch_probability >= 0)
9966
    {
9967
      /* Distribute the probabilities across the jumps.
9968
         Assume the BYPASS and SECOND to be always test
9969
         for UNORDERED.  */
9970
      probability = split_branch_probability;
9971
 
9972
      /* Value of 1 is low enough to make no need for probability
9973
         to be updated.  Later we may run some experiments and see
9974
         if unordered values are more frequent in practice.  */
9975
      if (bypass)
9976
        bypass_probability = 1;
9977
      if (second)
9978
        second_probability = 1;
9979
    }
9980
  if (bypass != NULL_RTX)
9981
    {
9982
      label = gen_label_rtx ();
9983
      i = emit_jump_insn (gen_rtx_SET
9984
                          (VOIDmode, pc_rtx,
9985
                           gen_rtx_IF_THEN_ELSE (VOIDmode,
9986
                                                 bypass,
9987
                                                 gen_rtx_LABEL_REF (VOIDmode,
9988
                                                                    label),
9989
                                                 pc_rtx)));
9990
      if (bypass_probability >= 0)
9991
        REG_NOTES (i)
9992
          = gen_rtx_EXPR_LIST (REG_BR_PROB,
9993
                               GEN_INT (bypass_probability),
9994
                               REG_NOTES (i));
9995
    }
9996
  i = emit_jump_insn (gen_rtx_SET
9997
                      (VOIDmode, pc_rtx,
9998
                       gen_rtx_IF_THEN_ELSE (VOIDmode,
9999
                                             condition, target1, target2)));
10000
  if (probability >= 0)
10001
    REG_NOTES (i)
10002
      = gen_rtx_EXPR_LIST (REG_BR_PROB,
10003
                           GEN_INT (probability),
10004
                           REG_NOTES (i));
10005
  if (second != NULL_RTX)
10006
    {
10007
      i = emit_jump_insn (gen_rtx_SET
10008
                          (VOIDmode, pc_rtx,
10009
                           gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10010
                                                 target2)));
10011
      if (second_probability >= 0)
10012
        REG_NOTES (i)
10013
          = gen_rtx_EXPR_LIST (REG_BR_PROB,
10014
                               GEN_INT (second_probability),
10015
                               REG_NOTES (i));
10016
    }
10017
  if (label != NULL_RTX)
10018
    emit_label (label);
10019
}
10020
 
10021
int
10022
ix86_expand_setcc (enum rtx_code code, rtx dest)
10023
{
10024
  rtx ret, tmp, tmpreg, equiv;
10025
  rtx second_test, bypass_test;
10026
 
10027
  if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10028
    return 0; /* FAIL */
10029
 
10030
  gcc_assert (GET_MODE (dest) == QImode);
10031
 
10032
  ret = ix86_expand_compare (code, &second_test, &bypass_test);
10033
  PUT_MODE (ret, QImode);
10034
 
10035
  tmp = dest;
10036
  tmpreg = dest;
10037
 
10038
  emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10039
  if (bypass_test || second_test)
10040
    {
10041
      rtx test = second_test;
10042
      int bypass = 0;
10043
      rtx tmp2 = gen_reg_rtx (QImode);
10044
      if (bypass_test)
10045
        {
10046
          gcc_assert (!second_test);
10047
          test = bypass_test;
10048
          bypass = 1;
10049
          PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10050
        }
10051
      PUT_MODE (test, QImode);
10052
      emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10053
 
10054
      if (bypass)
10055
        emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10056
      else
10057
        emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10058
    }
10059
 
10060
  /* Attach a REG_EQUAL note describing the comparison result.  */
10061
  if (ix86_compare_op0 && ix86_compare_op1)
10062
    {
10063
      equiv = simplify_gen_relational (code, QImode,
10064
                                       GET_MODE (ix86_compare_op0),
10065
                                       ix86_compare_op0, ix86_compare_op1);
10066
      set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10067
    }
10068
 
10069
  return 1; /* DONE */
10070
}
10071
 
10072
/* Expand comparison setting or clearing carry flag.  Return true when
10073
   successful and set pop for the operation.  */
10074
static bool
10075
ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10076
{
10077
  enum machine_mode mode =
10078
    GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10079
 
10080
  /* Do not handle DImode compares that go trought special path.  Also we can't
10081
     deal with FP compares yet.  This is possible to add.  */
10082
  if (mode == (TARGET_64BIT ? TImode : DImode))
10083
    return false;
10084
  if (FLOAT_MODE_P (mode))
10085
    {
10086
      rtx second_test = NULL, bypass_test = NULL;
10087
      rtx compare_op, compare_seq;
10088
 
10089
      /* Shortcut:  following common codes never translate into carry flag compares.  */
10090
      if (code == EQ || code == NE || code == UNEQ || code == LTGT
10091
          || code == ORDERED || code == UNORDERED)
10092
        return false;
10093
 
10094
      /* These comparisons require zero flag; swap operands so they won't.  */
10095
      if ((code == GT || code == UNLE || code == LE || code == UNGT)
10096
          && !TARGET_IEEE_FP)
10097
        {
10098
          rtx tmp = op0;
10099
          op0 = op1;
10100
          op1 = tmp;
10101
          code = swap_condition (code);
10102
        }
10103
 
10104
      /* Try to expand the comparison and verify that we end up with carry flag
10105
         based comparison.  This is fails to be true only when we decide to expand
10106
         comparison using arithmetic that is not too common scenario.  */
10107
      start_sequence ();
10108
      compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10109
                                           &second_test, &bypass_test);
10110
      compare_seq = get_insns ();
10111
      end_sequence ();
10112
 
10113
      if (second_test || bypass_test)
10114
        return false;
10115
      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10116
          || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10117
        code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10118
      else
10119
        code = GET_CODE (compare_op);
10120
      if (code != LTU && code != GEU)
10121
        return false;
10122
      emit_insn (compare_seq);
10123
      *pop = compare_op;
10124
      return true;
10125
    }
10126
  if (!INTEGRAL_MODE_P (mode))
10127
    return false;
10128
  switch (code)
10129
    {
10130
    case LTU:
10131
    case GEU:
10132
      break;
10133
 
10134
    /* Convert a==0 into (unsigned)a<1.  */
10135
    case EQ:
10136
    case NE:
10137
      if (op1 != const0_rtx)
10138
        return false;
10139
      op1 = const1_rtx;
10140
      code = (code == EQ ? LTU : GEU);
10141
      break;
10142
 
10143
    /* Convert a>b into b<a or a>=b-1.  */
10144
    case GTU:
10145
    case LEU:
10146
      if (GET_CODE (op1) == CONST_INT)
10147
        {
10148
          op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10149
          /* Bail out on overflow.  We still can swap operands but that
10150
             would force loading of the constant into register.  */
10151
          if (op1 == const0_rtx
10152
              || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10153
            return false;
10154
          code = (code == GTU ? GEU : LTU);
10155
        }
10156
      else
10157
        {
10158
          rtx tmp = op1;
10159
          op1 = op0;
10160
          op0 = tmp;
10161
          code = (code == GTU ? LTU : GEU);
10162
        }
10163
      break;
10164
 
10165
    /* Convert a>=0 into (unsigned)a<0x80000000.  */
10166
    case LT:
10167
    case GE:
10168
      if (mode == DImode || op1 != const0_rtx)
10169
        return false;
10170
      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10171
      code = (code == LT ? GEU : LTU);
10172
      break;
10173
    case LE:
10174
    case GT:
10175
      if (mode == DImode || op1 != constm1_rtx)
10176
        return false;
10177
      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10178
      code = (code == LE ? GEU : LTU);
10179
      break;
10180
 
10181
    default:
10182
      return false;
10183
    }
10184
  /* Swapping operands may cause constant to appear as first operand.  */
10185
  if (!nonimmediate_operand (op0, VOIDmode))
10186
    {
10187
      if (no_new_pseudos)
10188
        return false;
10189
      op0 = force_reg (mode, op0);
10190
    }
10191
  ix86_compare_op0 = op0;
10192
  ix86_compare_op1 = op1;
10193
  *pop = ix86_expand_compare (code, NULL, NULL);
10194
  gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10195
  return true;
10196
}
10197
 
10198
int
10199
ix86_expand_int_movcc (rtx operands[])
10200
{
10201
  enum rtx_code code = GET_CODE (operands[1]), compare_code;
10202
  rtx compare_seq, compare_op;
10203
  rtx second_test, bypass_test;
10204
  enum machine_mode mode = GET_MODE (operands[0]);
10205
  bool sign_bit_compare_p = false;;
10206
 
10207
  start_sequence ();
10208
  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10209
  compare_seq = get_insns ();
10210
  end_sequence ();
10211
 
10212
  compare_code = GET_CODE (compare_op);
10213
 
10214
  if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10215
      || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10216
    sign_bit_compare_p = true;
10217
 
10218
  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10219
     HImode insns, we'd be swallowed in word prefix ops.  */
10220
 
10221
  if ((mode != HImode || TARGET_FAST_PREFIX)
10222
      && (mode != (TARGET_64BIT ? TImode : DImode))
10223
      && GET_CODE (operands[2]) == CONST_INT
10224
      && GET_CODE (operands[3]) == CONST_INT)
10225
    {
10226
      rtx out = operands[0];
10227
      HOST_WIDE_INT ct = INTVAL (operands[2]);
10228
      HOST_WIDE_INT cf = INTVAL (operands[3]);
10229
      HOST_WIDE_INT diff;
10230
 
10231
      diff = ct - cf;
10232
      /*  Sign bit compares are better done using shifts than we do by using
10233
          sbb.  */
10234
      if (sign_bit_compare_p
10235
          || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10236
                                             ix86_compare_op1, &compare_op))
10237
        {
10238
          /* Detect overlap between destination and compare sources.  */
10239
          rtx tmp = out;
10240
 
10241
          if (!sign_bit_compare_p)
10242
            {
10243
              bool fpcmp = false;
10244
 
10245
              compare_code = GET_CODE (compare_op);
10246
 
10247
              if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10248
                  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10249
                {
10250
                  fpcmp = true;
10251
                  compare_code = ix86_fp_compare_code_to_integer (compare_code);
10252
                }
10253
 
10254
              /* To simplify rest of code, restrict to the GEU case.  */
10255
              if (compare_code == LTU)
10256
                {
10257
                  HOST_WIDE_INT tmp = ct;
10258
                  ct = cf;
10259
                  cf = tmp;
10260
                  compare_code = reverse_condition (compare_code);
10261
                  code = reverse_condition (code);
10262
                }
10263
              else
10264
                {
10265
                  if (fpcmp)
10266
                    PUT_CODE (compare_op,
10267
                              reverse_condition_maybe_unordered
10268
                                (GET_CODE (compare_op)));
10269
                  else
10270
                    PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10271
                }
10272
              diff = ct - cf;
10273
 
10274
              if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10275
                  || reg_overlap_mentioned_p (out, ix86_compare_op1))
10276
                tmp = gen_reg_rtx (mode);
10277
 
10278
              if (mode == DImode)
10279
                emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10280
              else
10281
                emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10282
            }
10283
          else
10284
            {
10285
              if (code == GT || code == GE)
10286
                code = reverse_condition (code);
10287
              else
10288
                {
10289
                  HOST_WIDE_INT tmp = ct;
10290
                  ct = cf;
10291
                  cf = tmp;
10292
                  diff = ct - cf;
10293
                }
10294
              tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10295
                                     ix86_compare_op1, VOIDmode, 0, -1);
10296
            }
10297
 
10298
          if (diff == 1)
10299
            {
10300
              /*
10301
               * cmpl op0,op1
10302
               * sbbl dest,dest
10303
               * [addl dest, ct]
10304
               *
10305
               * Size 5 - 8.
10306
               */
10307
              if (ct)
10308
                tmp = expand_simple_binop (mode, PLUS,
10309
                                           tmp, GEN_INT (ct),
10310
                                           copy_rtx (tmp), 1, OPTAB_DIRECT);
10311
            }
10312
          else if (cf == -1)
10313
            {
10314
              /*
10315
               * cmpl op0,op1
10316
               * sbbl dest,dest
10317
               * orl $ct, dest
10318
               *
10319
               * Size 8.
10320
               */
10321
              tmp = expand_simple_binop (mode, IOR,
10322
                                         tmp, GEN_INT (ct),
10323
                                         copy_rtx (tmp), 1, OPTAB_DIRECT);
10324
            }
10325
          else if (diff == -1 && ct)
10326
            {
10327
              /*
10328
               * cmpl op0,op1
10329
               * sbbl dest,dest
10330
               * notl dest
10331
               * [addl dest, cf]
10332
               *
10333
               * Size 8 - 11.
10334
               */
10335
              tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10336
              if (cf)
10337
                tmp = expand_simple_binop (mode, PLUS,
10338
                                           copy_rtx (tmp), GEN_INT (cf),
10339
                                           copy_rtx (tmp), 1, OPTAB_DIRECT);
10340
            }
10341
          else
10342
            {
10343
              /*
10344
               * cmpl op0,op1
10345
               * sbbl dest,dest
10346
               * [notl dest]
10347
               * andl cf - ct, dest
10348
               * [addl dest, ct]
10349
               *
10350
               * Size 8 - 11.
10351
               */
10352
 
10353
              if (cf == 0)
10354
                {
10355
                  cf = ct;
10356
                  ct = 0;
10357
                  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10358
                }
10359
 
10360
              tmp = expand_simple_binop (mode, AND,
10361
                                         copy_rtx (tmp),
10362
                                         gen_int_mode (cf - ct, mode),
10363
                                         copy_rtx (tmp), 1, OPTAB_DIRECT);
10364
              if (ct)
10365
                tmp = expand_simple_binop (mode, PLUS,
10366
                                           copy_rtx (tmp), GEN_INT (ct),
10367
                                           copy_rtx (tmp), 1, OPTAB_DIRECT);
10368
            }
10369
 
10370
          if (!rtx_equal_p (tmp, out))
10371
            emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10372
 
10373
          return 1; /* DONE */
10374
        }
10375
 
10376
      if (diff < 0)
10377
        {
10378
          HOST_WIDE_INT tmp;
10379
          tmp = ct, ct = cf, cf = tmp;
10380
          diff = -diff;
10381
          if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10382
            {
10383
              /* We may be reversing unordered compare to normal compare, that
10384
                 is not valid in general (we may convert non-trapping condition
10385
                 to trapping one), however on i386 we currently emit all
10386
                 comparisons unordered.  */
10387
              compare_code = reverse_condition_maybe_unordered (compare_code);
10388
              code = reverse_condition_maybe_unordered (code);
10389
            }
10390
          else
10391
            {
10392
              compare_code = reverse_condition (compare_code);
10393
              code = reverse_condition (code);
10394
            }
10395
        }
10396
 
10397
      compare_code = UNKNOWN;
10398
      if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10399
          && GET_CODE (ix86_compare_op1) == CONST_INT)
10400
        {
10401
          if (ix86_compare_op1 == const0_rtx
10402
              && (code == LT || code == GE))
10403
            compare_code = code;
10404
          else if (ix86_compare_op1 == constm1_rtx)
10405
            {
10406
              if (code == LE)
10407
                compare_code = LT;
10408
              else if (code == GT)
10409
                compare_code = GE;
10410
            }
10411
        }
10412
 
10413
      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
10414
      if (compare_code != UNKNOWN
10415
          && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10416
          && (cf == -1 || ct == -1))
10417
        {
10418
          /* If lea code below could be used, only optimize
10419
             if it results in a 2 insn sequence.  */
10420
 
10421
          if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10422
                 || diff == 3 || diff == 5 || diff == 9)
10423
              || (compare_code == LT && ct == -1)
10424
              || (compare_code == GE && cf == -1))
10425
            {
10426
              /*
10427
               * notl op1       (if necessary)
10428
               * sarl $31, op1
10429
               * orl cf, op1
10430
               */
10431
              if (ct != -1)
10432
                {
10433
                  cf = ct;
10434
                  ct = -1;
10435
                  code = reverse_condition (code);
10436
                }
10437
 
10438
              out = emit_store_flag (out, code, ix86_compare_op0,
10439
                                     ix86_compare_op1, VOIDmode, 0, -1);
10440
 
10441
              out = expand_simple_binop (mode, IOR,
10442
                                         out, GEN_INT (cf),
10443
                                         out, 1, OPTAB_DIRECT);
10444
              if (out != operands[0])
10445
                emit_move_insn (operands[0], out);
10446
 
10447
              return 1; /* DONE */
10448
            }
10449
        }
10450
 
10451
 
10452
      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10453
           || diff == 3 || diff == 5 || diff == 9)
10454
          && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10455
          && (mode != DImode
10456
              || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
10457
        {
10458
          /*
10459
           * xorl dest,dest
10460
           * cmpl op1,op2
10461
           * setcc dest
10462
           * lea cf(dest*(ct-cf)),dest
10463
           *
10464
           * Size 14.
10465
           *
10466
           * This also catches the degenerate setcc-only case.
10467
           */
10468
 
10469
          rtx tmp;
10470
          int nops;
10471
 
10472
          out = emit_store_flag (out, code, ix86_compare_op0,
10473
                                 ix86_compare_op1, VOIDmode, 0, 1);
10474
 
10475
          nops = 0;
10476
          /* On x86_64 the lea instruction operates on Pmode, so we need
10477
             to get arithmetics done in proper mode to match.  */
10478
          if (diff == 1)
10479
            tmp = copy_rtx (out);
10480
          else
10481
            {
10482
              rtx out1;
10483
              out1 = copy_rtx (out);
10484
              tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10485
              nops++;
10486
              if (diff & 1)
10487
                {
10488
                  tmp = gen_rtx_PLUS (mode, tmp, out1);
10489
                  nops++;
10490
                }
10491
            }
10492
          if (cf != 0)
10493
            {
10494
              tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10495
              nops++;
10496
            }
10497
          if (!rtx_equal_p (tmp, out))
10498
            {
10499
              if (nops == 1)
10500
                out = force_operand (tmp, copy_rtx (out));
10501
              else
10502
                emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10503
            }
10504
          if (!rtx_equal_p (out, operands[0]))
10505
            emit_move_insn (operands[0], copy_rtx (out));
10506
 
10507
          return 1; /* DONE */
10508
        }
10509
 
10510
      /*
10511
       * General case:                  Jumpful:
10512
       *   xorl dest,dest               cmpl op1, op2
10513
       *   cmpl op1, op2                movl ct, dest
10514
       *   setcc dest                   jcc 1f
10515
       *   decl dest                    movl cf, dest
10516
       *   andl (cf-ct),dest            1:
10517
       *   addl ct,dest
10518
       *
10519
       * Size 20.                       Size 14.
10520
       *
10521
       * This is reasonably steep, but branch mispredict costs are
10522
       * high on modern cpus, so consider failing only if optimizing
10523
       * for space.
10524
       */
10525
 
10526
      if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10527
          && BRANCH_COST >= 2)
10528
        {
10529
          if (cf == 0)
10530
            {
10531
              cf = ct;
10532
              ct = 0;
10533
              if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10534
                /* We may be reversing unordered compare to normal compare,
10535
                   that is not valid in general (we may convert non-trapping
10536
                   condition to trapping one), however on i386 we currently
10537
                   emit all comparisons unordered.  */
10538
                code = reverse_condition_maybe_unordered (code);
10539
              else
10540
                {
10541
                  code = reverse_condition (code);
10542
                  if (compare_code != UNKNOWN)
10543
                    compare_code = reverse_condition (compare_code);
10544
                }
10545
            }
10546
 
10547
          if (compare_code != UNKNOWN)
10548
            {
10549
              /* notl op1       (if needed)
10550
                 sarl $31, op1
10551
                 andl (cf-ct), op1
10552
                 addl ct, op1
10553
 
10554
                 For x < 0 (resp. x <= -1) there will be no notl,
10555
                 so if possible swap the constants to get rid of the
10556
                 complement.
10557
                 True/false will be -1/0 while code below (store flag
10558
                 followed by decrement) is 0/-1, so the constants need
10559
                 to be exchanged once more.  */
10560
 
10561
              if (compare_code == GE || !cf)
10562
                {
10563
                  code = reverse_condition (code);
10564
                  compare_code = LT;
10565
                }
10566
              else
10567
                {
10568
                  HOST_WIDE_INT tmp = cf;
10569
                  cf = ct;
10570
                  ct = tmp;
10571
                }
10572
 
10573
              out = emit_store_flag (out, code, ix86_compare_op0,
10574
                                     ix86_compare_op1, VOIDmode, 0, -1);
10575
            }
10576
          else
10577
            {
10578
              out = emit_store_flag (out, code, ix86_compare_op0,
10579
                                     ix86_compare_op1, VOIDmode, 0, 1);
10580
 
10581
              out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10582
                                         copy_rtx (out), 1, OPTAB_DIRECT);
10583
            }
10584
 
10585
          out = expand_simple_binop (mode, AND, copy_rtx (out),
10586
                                     gen_int_mode (cf - ct, mode),
10587
                                     copy_rtx (out), 1, OPTAB_DIRECT);
10588
          if (ct)
10589
            out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10590
                                       copy_rtx (out), 1, OPTAB_DIRECT);
10591
          if (!rtx_equal_p (out, operands[0]))
10592
            emit_move_insn (operands[0], copy_rtx (out));
10593
 
10594
          return 1; /* DONE */
10595
        }
10596
    }
10597
 
10598
  if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10599
    {
10600
      /* Try a few things more with specific constants and a variable.  */
10601
 
10602
      optab op;
10603
      rtx var, orig_out, out, tmp;
10604
 
10605
      if (BRANCH_COST <= 2)
10606
        return 0; /* FAIL */
10607
 
10608
      /* If one of the two operands is an interesting constant, load a
10609
         constant with the above and mask it in with a logical operation.  */
10610
 
10611
      if (GET_CODE (operands[2]) == CONST_INT)
10612
        {
10613
          var = operands[3];
10614
          if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10615
            operands[3] = constm1_rtx, op = and_optab;
10616
          else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10617
            operands[3] = const0_rtx, op = ior_optab;
10618
          else
10619
            return 0; /* FAIL */
10620
        }
10621
      else if (GET_CODE (operands[3]) == CONST_INT)
10622
        {
10623
          var = operands[2];
10624
          if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10625
            operands[2] = constm1_rtx, op = and_optab;
10626
          else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10627
            operands[2] = const0_rtx, op = ior_optab;
10628
          else
10629
            return 0; /* FAIL */
10630
        }
10631
      else
10632
        return 0; /* FAIL */
10633
 
10634
      orig_out = operands[0];
10635
      tmp = gen_reg_rtx (mode);
10636
      operands[0] = tmp;
10637
 
10638
      /* Recurse to get the constant loaded.  */
10639
      if (ix86_expand_int_movcc (operands) == 0)
10640
        return 0; /* FAIL */
10641
 
10642
      /* Mask in the interesting variable.  */
10643
      out = expand_binop (mode, op, var, tmp, orig_out, 0,
10644
                          OPTAB_WIDEN);
10645
      if (!rtx_equal_p (out, orig_out))
10646
        emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10647
 
10648
      return 1; /* DONE */
10649
    }
10650
 
10651
  /*
10652
   * For comparison with above,
10653
   *
10654
   * movl cf,dest
10655
   * movl ct,tmp
10656
   * cmpl op1,op2
10657
   * cmovcc tmp,dest
10658
   *
10659
   * Size 15.
10660
   */
10661
 
10662
  if (! nonimmediate_operand (operands[2], mode))
10663
    operands[2] = force_reg (mode, operands[2]);
10664
  if (! nonimmediate_operand (operands[3], mode))
10665
    operands[3] = force_reg (mode, operands[3]);
10666
 
10667
  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10668
    {
10669
      rtx tmp = gen_reg_rtx (mode);
10670
      emit_move_insn (tmp, operands[3]);
10671
      operands[3] = tmp;
10672
    }
10673
  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10674
    {
10675
      rtx tmp = gen_reg_rtx (mode);
10676
      emit_move_insn (tmp, operands[2]);
10677
      operands[2] = tmp;
10678
    }
10679
 
10680
  if (! register_operand (operands[2], VOIDmode)
10681
      && (mode == QImode
10682
          || ! register_operand (operands[3], VOIDmode)))
10683
    operands[2] = force_reg (mode, operands[2]);
10684
 
10685
  if (mode == QImode
10686
      && ! register_operand (operands[3], VOIDmode))
10687
    operands[3] = force_reg (mode, operands[3]);
10688
 
10689
  emit_insn (compare_seq);
10690
  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10691
                          gen_rtx_IF_THEN_ELSE (mode,
10692
                                                compare_op, operands[2],
10693
                                                operands[3])));
10694
  if (bypass_test)
10695
    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10696
                            gen_rtx_IF_THEN_ELSE (mode,
10697
                                  bypass_test,
10698
                                  copy_rtx (operands[3]),
10699
                                  copy_rtx (operands[0]))));
10700
  if (second_test)
10701
    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10702
                            gen_rtx_IF_THEN_ELSE (mode,
10703
                                  second_test,
10704
                                  copy_rtx (operands[2]),
10705
                                  copy_rtx (operands[0]))));
10706
 
10707
  return 1; /* DONE */
10708
}
10709
 
10710
/* Swap, force into registers, or otherwise massage the two operands
10711
   to an sse comparison with a mask result.  Thus we differ a bit from
10712
   ix86_prepare_fp_compare_args which expects to produce a flags result.
10713
 
10714
   The DEST operand exists to help determine whether to commute commutative
10715
   operators.  The POP0/POP1 operands are updated in place.  The new
10716
   comparison code is returned, or UNKNOWN if not implementable.  */
10717
 
10718
static enum rtx_code
10719
ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
10720
                                  rtx *pop0, rtx *pop1)
10721
{
10722
  rtx tmp;
10723
 
10724
  switch (code)
10725
    {
10726
    case LTGT:
10727
    case UNEQ:
10728
      /* We have no LTGT as an operator.  We could implement it with
10729
         NE & ORDERED, but this requires an extra temporary.  It's
10730
         not clear that it's worth it.  */
10731
      return UNKNOWN;
10732
 
10733
    case LT:
10734
    case LE:
10735
    case UNGT:
10736
    case UNGE:
10737
      /* These are supported directly.  */
10738
      break;
10739
 
10740
    case EQ:
10741
    case NE:
10742
    case UNORDERED:
10743
    case ORDERED:
10744
      /* For commutative operators, try to canonicalize the destination
10745
         operand to be first in the comparison - this helps reload to
10746
         avoid extra moves.  */
10747
      if (!dest || !rtx_equal_p (dest, *pop1))
10748
        break;
10749
      /* FALLTHRU */
10750
 
10751
    case GE:
10752
    case GT:
10753
    case UNLE:
10754
    case UNLT:
10755
      /* These are not supported directly.  Swap the comparison operands
10756
         to transform into something that is supported.  */
10757
      tmp = *pop0;
10758
      *pop0 = *pop1;
10759
      *pop1 = tmp;
10760
      code = swap_condition (code);
10761
      break;
10762
 
10763
    default:
10764
      gcc_unreachable ();
10765
    }
10766
 
10767
  return code;
10768
}
10769
 
10770
/* Detect conditional moves that exactly match min/max operational
10771
   semantics.  Note that this is IEEE safe, as long as we don't
10772
   interchange the operands.
10773
 
10774
   Returns FALSE if this conditional move doesn't match a MIN/MAX,
10775
   and TRUE if the operation is successful and instructions are emitted.  */
10776
 
10777
static bool
10778
ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
10779
                           rtx cmp_op1, rtx if_true, rtx if_false)
10780
{
10781
  enum machine_mode mode;
10782
  bool is_min;
10783
  rtx tmp;
10784
 
10785
  if (code == LT)
10786
    ;
10787
  else if (code == UNGE)
10788
    {
10789
      tmp = if_true;
10790
      if_true = if_false;
10791
      if_false = tmp;
10792
    }
10793
  else
10794
    return false;
10795
 
10796
  if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
10797
    is_min = true;
10798
  else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
10799
    is_min = false;
10800
  else
10801
    return false;
10802
 
10803
  mode = GET_MODE (dest);
10804
 
10805
  /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
10806
     but MODE may be a vector mode and thus not appropriate.  */
10807
  if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
10808
    {
10809
      int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
10810
      rtvec v;
10811
 
10812
      if_true = force_reg (mode, if_true);
10813
      v = gen_rtvec (2, if_true, if_false);
10814
      tmp = gen_rtx_UNSPEC (mode, v, u);
10815
    }
10816
  else
10817
    {
10818
      code = is_min ? SMIN : SMAX;
10819
      tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
10820
    }
10821
 
10822
  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
10823
  return true;
10824
}
10825
 
10826
/* Expand an sse vector comparison.  Return the register with the result.  */
10827
 
10828
static rtx
10829
ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
10830
                     rtx op_true, rtx op_false)
10831
{
10832
  enum machine_mode mode = GET_MODE (dest);
10833
  rtx x;
10834
 
10835
  cmp_op0 = force_reg (mode, cmp_op0);
10836
  if (!nonimmediate_operand (cmp_op1, mode))
10837
    cmp_op1 = force_reg (mode, cmp_op1);
10838
 
10839
  if (optimize
10840
      || reg_overlap_mentioned_p (dest, op_true)
10841
      || reg_overlap_mentioned_p (dest, op_false))
10842
    dest = gen_reg_rtx (mode);
10843
 
10844
  x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
10845
  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10846
 
10847
  return dest;
10848
}
10849
 
10850
/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
10851
   operations.  This is used for both scalar and vector conditional moves.  */
10852
 
10853
static void
10854
ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
10855
{
10856
  enum machine_mode mode = GET_MODE (dest);
10857
  rtx t2, t3, x;
10858
 
10859
  if (op_false == CONST0_RTX (mode))
10860
    {
10861
      op_true = force_reg (mode, op_true);
10862
      x = gen_rtx_AND (mode, cmp, op_true);
10863
      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10864
    }
10865
  else if (op_true == CONST0_RTX (mode))
10866
    {
10867
      op_false = force_reg (mode, op_false);
10868
      x = gen_rtx_NOT (mode, cmp);
10869
      x = gen_rtx_AND (mode, x, op_false);
10870
      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10871
    }
10872
  else
10873
    {
10874
      op_true = force_reg (mode, op_true);
10875
      op_false = force_reg (mode, op_false);
10876
 
10877
      t2 = gen_reg_rtx (mode);
10878
      if (optimize)
10879
        t3 = gen_reg_rtx (mode);
10880
      else
10881
        t3 = dest;
10882
 
10883
      x = gen_rtx_AND (mode, op_true, cmp);
10884
      emit_insn (gen_rtx_SET (VOIDmode, t2, x));
10885
 
10886
      x = gen_rtx_NOT (mode, cmp);
10887
      x = gen_rtx_AND (mode, x, op_false);
10888
      emit_insn (gen_rtx_SET (VOIDmode, t3, x));
10889
 
10890
      x = gen_rtx_IOR (mode, t3, t2);
10891
      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10892
    }
10893
}
10894
 
10895
/* Expand a floating-point conditional move.  Return true if successful.  */
10896
 
10897
int
10898
ix86_expand_fp_movcc (rtx operands[])
10899
{
10900
  enum machine_mode mode = GET_MODE (operands[0]);
10901
  enum rtx_code code = GET_CODE (operands[1]);
10902
  rtx tmp, compare_op, second_test, bypass_test;
10903
 
10904
  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
10905
    {
10906
      enum machine_mode cmode;
10907
 
10908
      /* Since we've no cmove for sse registers, don't force bad register
10909
         allocation just to gain access to it.  Deny movcc when the
10910
         comparison mode doesn't match the move mode.  */
10911
      cmode = GET_MODE (ix86_compare_op0);
10912
      if (cmode == VOIDmode)
10913
        cmode = GET_MODE (ix86_compare_op1);
10914
      if (cmode != mode)
10915
        return 0;
10916
 
10917
      code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10918
                                               &ix86_compare_op0,
10919
                                               &ix86_compare_op1);
10920
      if (code == UNKNOWN)
10921
        return 0;
10922
 
10923
      if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
10924
                                     ix86_compare_op1, operands[2],
10925
                                     operands[3]))
10926
        return 1;
10927
 
10928
      tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
10929
                                 ix86_compare_op1, operands[2], operands[3]);
10930
      ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
10931
      return 1;
10932
    }
10933
 
10934
  /* The floating point conditional move instructions don't directly
10935
     support conditions resulting from a signed integer comparison.  */
10936
 
10937
  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10938
 
10939
  /* The floating point conditional move instructions don't directly
10940
     support signed integer comparisons.  */
10941
 
10942
  if (!fcmov_comparison_operator (compare_op, VOIDmode))
10943
    {
10944
      gcc_assert (!second_test && !bypass_test);
10945
      tmp = gen_reg_rtx (QImode);
10946
      ix86_expand_setcc (code, tmp);
10947
      code = NE;
10948
      ix86_compare_op0 = tmp;
10949
      ix86_compare_op1 = const0_rtx;
10950
      compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
10951
    }
10952
  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10953
    {
10954
      tmp = gen_reg_rtx (mode);
10955
      emit_move_insn (tmp, operands[3]);
10956
      operands[3] = tmp;
10957
    }
10958
  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10959
    {
10960
      tmp = gen_reg_rtx (mode);
10961
      emit_move_insn (tmp, operands[2]);
10962
      operands[2] = tmp;
10963
    }
10964
 
10965
  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10966
                          gen_rtx_IF_THEN_ELSE (mode, compare_op,
10967
                                                operands[2], operands[3])));
10968
  if (bypass_test)
10969
    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10970
                            gen_rtx_IF_THEN_ELSE (mode, bypass_test,
10971
                                                  operands[3], operands[0])));
10972
  if (second_test)
10973
    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10974
                            gen_rtx_IF_THEN_ELSE (mode, second_test,
10975
                                                  operands[2], operands[0])));
10976
 
10977
  return 1;
10978
}
10979
 
10980
/* Expand a floating-point vector conditional move; a vcond operation
10981
   rather than a movcc operation.  */
10982
 
10983
bool
10984
ix86_expand_fp_vcond (rtx operands[])
10985
{
10986
  enum rtx_code code = GET_CODE (operands[3]);
10987
  rtx cmp;
10988
 
10989
  code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10990
                                           &operands[4], &operands[5]);
10991
  if (code == UNKNOWN)
10992
    return false;
10993
 
10994
  if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
10995
                                 operands[5], operands[1], operands[2]))
10996
    return true;
10997
 
10998
  cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
10999
                             operands[1], operands[2]);
11000
  ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11001
  return true;
11002
}
11003
 
11004
/* Expand a signed integral vector conditional move.  */
11005
 
11006
bool
11007
ix86_expand_int_vcond (rtx operands[])
11008
{
11009
  enum machine_mode mode = GET_MODE (operands[0]);
11010
  enum rtx_code code = GET_CODE (operands[3]);
11011
  bool negate = false;
11012
  rtx x, cop0, cop1;
11013
 
11014
  cop0 = operands[4];
11015
  cop1 = operands[5];
11016
 
11017
  /* Canonicalize the comparison to EQ, GT, GTU.  */
11018
  switch (code)
11019
    {
11020
    case EQ:
11021
    case GT:
11022
    case GTU:
11023
      break;
11024
 
11025
    case NE:
11026
    case LE:
11027
    case LEU:
11028
      code = reverse_condition (code);
11029
      negate = true;
11030
      break;
11031
 
11032
    case GE:
11033
    case GEU:
11034
      code = reverse_condition (code);
11035
      negate = true;
11036
      /* FALLTHRU */
11037
 
11038
    case LT:
11039
    case LTU:
11040
      code = swap_condition (code);
11041
      x = cop0, cop0 = cop1, cop1 = x;
11042
      break;
11043
 
11044
    default:
11045
      gcc_unreachable ();
11046
    }
11047
 
11048
  /* Unsigned parallel compare is not supported by the hardware.  Play some
11049
     tricks to turn this into a signed comparison against 0.  */
11050
  if (code == GTU)
11051
    {
11052
      switch (mode)
11053
        {
11054
        case V4SImode:
11055
          {
11056
            rtx t1, t2, mask;
11057
 
11058
            /* Perform a parallel modulo subtraction.  */
11059
            t1 = gen_reg_rtx (mode);
11060
            emit_insn (gen_subv4si3 (t1, cop0, cop1));
11061
 
11062
            /* Extract the original sign bit of op0.  */
11063
            mask = GEN_INT (-0x80000000);
11064
            mask = gen_rtx_CONST_VECTOR (mode,
11065
                        gen_rtvec (4, mask, mask, mask, mask));
11066
            mask = force_reg (mode, mask);
11067
            t2 = gen_reg_rtx (mode);
11068
            emit_insn (gen_andv4si3 (t2, cop0, mask));
11069
 
11070
            /* XOR it back into the result of the subtraction.  This results
11071
               in the sign bit set iff we saw unsigned underflow.  */
11072
            x = gen_reg_rtx (mode);
11073
            emit_insn (gen_xorv4si3 (x, t1, t2));
11074
 
11075
            code = GT;
11076
          }
11077
          break;
11078
 
11079
        case V16QImode:
11080
        case V8HImode:
11081
          /* Perform a parallel unsigned saturating subtraction.  */
11082
          x = gen_reg_rtx (mode);
11083
          emit_insn (gen_rtx_SET (VOIDmode, x,
11084
                                  gen_rtx_US_MINUS (mode, cop0, cop1)));
11085
 
11086
          code = EQ;
11087
          negate = !negate;
11088
          break;
11089
 
11090
        default:
11091
          gcc_unreachable ();
11092
        }
11093
 
11094
      cop0 = x;
11095
      cop1 = CONST0_RTX (mode);
11096
    }
11097
 
11098
  x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11099
                           operands[1+negate], operands[2-negate]);
11100
 
11101
  ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11102
                         operands[2-negate]);
11103
  return true;
11104
}
11105
 
11106
/* Expand conditional increment or decrement using adb/sbb instructions.
11107
   The default case using setcc followed by the conditional move can be
11108
   done by generic code.  */
11109
int
11110
ix86_expand_int_addcc (rtx operands[])
11111
{
11112
  enum rtx_code code = GET_CODE (operands[1]);
11113
  rtx compare_op;
11114
  rtx val = const0_rtx;
11115
  bool fpcmp = false;
11116
  enum machine_mode mode = GET_MODE (operands[0]);
11117
 
11118
  if (operands[3] != const1_rtx
11119
      && operands[3] != constm1_rtx)
11120
    return 0;
11121
  if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11122
                                       ix86_compare_op1, &compare_op))
11123
     return 0;
11124
  code = GET_CODE (compare_op);
11125
 
11126
  if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11127
      || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11128
    {
11129
      fpcmp = true;
11130
      code = ix86_fp_compare_code_to_integer (code);
11131
    }
11132
 
11133
  if (code != LTU)
11134
    {
11135
      val = constm1_rtx;
11136
      if (fpcmp)
11137
        PUT_CODE (compare_op,
11138
                  reverse_condition_maybe_unordered
11139
                    (GET_CODE (compare_op)));
11140
      else
11141
        PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11142
    }
11143
  PUT_MODE (compare_op, mode);
11144
 
11145
  /* Construct either adc or sbb insn.  */
11146
  if ((code == LTU) == (operands[3] == constm1_rtx))
11147
    {
11148
      switch (GET_MODE (operands[0]))
11149
        {
11150
          case QImode:
11151
            emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11152
            break;
11153
          case HImode:
11154
            emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11155
            break;
11156
          case SImode:
11157
            emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11158
            break;
11159
          case DImode:
11160
            emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11161
            break;
11162
          default:
11163
            gcc_unreachable ();
11164
        }
11165
    }
11166
  else
11167
    {
11168
      switch (GET_MODE (operands[0]))
11169
        {
11170
          case QImode:
11171
            emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11172
            break;
11173
          case HImode:
11174
            emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11175
            break;
11176
          case SImode:
11177
            emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11178
            break;
11179
          case DImode:
11180
            emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11181
            break;
11182
          default:
11183
            gcc_unreachable ();
11184
        }
11185
    }
11186
  return 1; /* DONE */
11187
}
11188
 
11189
 
11190
/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
11191
   works for floating pointer parameters and nonoffsetable memories.
11192
   For pushes, it returns just stack offsets; the values will be saved
11193
   in the right order.  Maximally three parts are generated.  */
11194
 
11195
static int
11196
ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11197
{
11198
  int size;
11199
 
11200
  if (!TARGET_64BIT)
11201
    size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11202
  else
11203
    size = (GET_MODE_SIZE (mode) + 4) / 8;
11204
 
11205
  gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11206
  gcc_assert (size >= 2 && size <= 3);
11207
 
11208
  /* Optimize constant pool reference to immediates.  This is used by fp
11209
     moves, that force all constants to memory to allow combining.  */
11210
  if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11211
    {
11212
      rtx tmp = maybe_get_pool_constant (operand);
11213
      if (tmp)
11214
        operand = tmp;
11215
    }
11216
 
11217
  if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11218
    {
11219
      /* The only non-offsetable memories we handle are pushes.  */
11220
      int ok = push_operand (operand, VOIDmode);
11221
 
11222
      gcc_assert (ok);
11223
 
11224
      operand = copy_rtx (operand);
11225
      PUT_MODE (operand, Pmode);
11226
      parts[0] = parts[1] = parts[2] = operand;
11227
      return size;
11228
    }
11229
 
11230
  if (GET_CODE (operand) == CONST_VECTOR)
11231
    {
11232
      enum machine_mode imode = int_mode_for_mode (mode);
11233
      /* Caution: if we looked through a constant pool memory above,
11234
         the operand may actually have a different mode now.  That's
11235
         ok, since we want to pun this all the way back to an integer.  */
11236
      operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11237
      gcc_assert (operand != NULL);
11238
      mode = imode;
11239
    }
11240
 
11241
  if (!TARGET_64BIT)
11242
    {
11243
      if (mode == DImode)
11244
        split_di (&operand, 1, &parts[0], &parts[1]);
11245
      else
11246
        {
11247
          if (REG_P (operand))
11248
            {
11249
              gcc_assert (reload_completed);
11250
              parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11251
              parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11252
              if (size == 3)
11253
                parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11254
            }
11255
          else if (offsettable_memref_p (operand))
11256
            {
11257
              operand = adjust_address (operand, SImode, 0);
11258
              parts[0] = operand;
11259
              parts[1] = adjust_address (operand, SImode, 4);
11260
              if (size == 3)
11261
                parts[2] = adjust_address (operand, SImode, 8);
11262
            }
11263
          else if (GET_CODE (operand) == CONST_DOUBLE)
11264
            {
11265
              REAL_VALUE_TYPE r;
11266
              long l[4];
11267
 
11268
              REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11269
              switch (mode)
11270
                {
11271
                case XFmode:
11272
                  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11273
                  parts[2] = gen_int_mode (l[2], SImode);
11274
                  break;
11275
                case DFmode:
11276
                  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11277
                  break;
11278
                default:
11279
                  gcc_unreachable ();
11280
                }
11281
              parts[1] = gen_int_mode (l[1], SImode);
11282
              parts[0] = gen_int_mode (l[0], SImode);
11283
            }
11284
          else
11285
            gcc_unreachable ();
11286
        }
11287
    }
11288
  else
11289
    {
11290
      if (mode == TImode)
11291
        split_ti (&operand, 1, &parts[0], &parts[1]);
11292
      if (mode == XFmode || mode == TFmode)
11293
        {
11294
          enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11295
          if (REG_P (operand))
11296
            {
11297
              gcc_assert (reload_completed);
11298
              parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11299
              parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11300
            }
11301
          else if (offsettable_memref_p (operand))
11302
            {
11303
              operand = adjust_address (operand, DImode, 0);
11304
              parts[0] = operand;
11305
              parts[1] = adjust_address (operand, upper_mode, 8);
11306
            }
11307
          else if (GET_CODE (operand) == CONST_DOUBLE)
11308
            {
11309
              REAL_VALUE_TYPE r;
11310
              long l[4];
11311
 
11312
              REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11313
              real_to_target (l, &r, mode);
11314
 
11315
              /* Do not use shift by 32 to avoid warning on 32bit systems.  */
11316
              if (HOST_BITS_PER_WIDE_INT >= 64)
11317
                parts[0]
11318
                  = gen_int_mode
11319
                      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11320
                       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11321
                       DImode);
11322
              else
11323
                parts[0] = immed_double_const (l[0], l[1], DImode);
11324
 
11325
              if (upper_mode == SImode)
11326
                parts[1] = gen_int_mode (l[2], SImode);
11327
              else if (HOST_BITS_PER_WIDE_INT >= 64)
11328
                parts[1]
11329
                  = gen_int_mode
11330
                      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11331
                       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11332
                       DImode);
11333
              else
11334
                parts[1] = immed_double_const (l[2], l[3], DImode);
11335
            }
11336
          else
11337
            gcc_unreachable ();
11338
        }
11339
    }
11340
 
11341
  return size;
11342
}
11343
 
11344
/* Emit insns to perform a move or push of DI, DF, and XF values.
11345
   Return false when normal moves are needed; true when all required
11346
   insns have been emitted.  Operands 2-4 contain the input values
11347
   int the correct order; operands 5-7 contain the output values.  */
11348
 
11349
void
11350
ix86_split_long_move (rtx operands[])
11351
{
11352
  rtx part[2][3];
11353
  int nparts;
11354
  int push = 0;
11355
  int collisions = 0;
11356
  enum machine_mode mode = GET_MODE (operands[0]);
11357
 
11358
  /* The DFmode expanders may ask us to move double.
11359
     For 64bit target this is single move.  By hiding the fact
11360
     here we simplify i386.md splitters.  */
11361
  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11362
    {
11363
      /* Optimize constant pool reference to immediates.  This is used by
11364
         fp moves, that force all constants to memory to allow combining.  */
11365
 
11366
      if (GET_CODE (operands[1]) == MEM
11367
          && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11368
          && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11369
        operands[1] = get_pool_constant (XEXP (operands[1], 0));
11370
      if (push_operand (operands[0], VOIDmode))
11371
        {
11372
          operands[0] = copy_rtx (operands[0]);
11373
          PUT_MODE (operands[0], Pmode);
11374
        }
11375
      else
11376
        operands[0] = gen_lowpart (DImode, operands[0]);
11377
      operands[1] = gen_lowpart (DImode, operands[1]);
11378
      emit_move_insn (operands[0], operands[1]);
11379
      return;
11380
    }
11381
 
11382
  /* The only non-offsettable memory we handle is push.  */
11383
  if (push_operand (operands[0], VOIDmode))
11384
    push = 1;
11385
  else
11386
    gcc_assert (GET_CODE (operands[0]) != MEM
11387
                || offsettable_memref_p (operands[0]));
11388
 
11389
  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11390
  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11391
 
11392
  /* When emitting push, take care for source operands on the stack.  */
11393
  if (push && GET_CODE (operands[1]) == MEM
11394
      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11395
    {
11396
      if (nparts == 3)
11397
        part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11398
                                     XEXP (part[1][2], 0));
11399
      part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11400
                                   XEXP (part[1][1], 0));
11401
    }
11402
 
11403
  /* We need to do copy in the right order in case an address register
11404
     of the source overlaps the destination.  */
11405
  if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11406
    {
11407
      if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11408
        collisions++;
11409
      if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11410
        collisions++;
11411
      if (nparts == 3
11412
          && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
11413
        collisions++;
11414
 
11415
      /* Collision in the middle part can be handled by reordering.  */
11416
      if (collisions == 1 && nparts == 3
11417
          && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11418
        {
11419
          rtx tmp;
11420
          tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
11421
          tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
11422
        }
11423
 
11424
      /* If there are more collisions, we can't handle it by reordering.
11425
         Do an lea to the last part and use only one colliding move.  */
11426
      else if (collisions > 1)
11427
        {
11428
          rtx base;
11429
 
11430
          collisions = 1;
11431
 
11432
          base = part[0][nparts - 1];
11433
 
11434
          /* Handle the case when the last part isn't valid for lea.
11435
             Happens in 64-bit mode storing the 12-byte XFmode.  */
11436
          if (GET_MODE (base) != Pmode)
11437
            base = gen_rtx_REG (Pmode, REGNO (base));
11438
 
11439
          emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
11440
          part[1][0] = replace_equiv_address (part[1][0], base);
11441
          part[1][1] = replace_equiv_address (part[1][1],
11442
                                      plus_constant (base, UNITS_PER_WORD));
11443
          if (nparts == 3)
11444
            part[1][2] = replace_equiv_address (part[1][2],
11445
                                      plus_constant (base, 8));
11446
        }
11447
    }
11448
 
11449
  if (push)
11450
    {
11451
      if (!TARGET_64BIT)
11452
        {
11453
          if (nparts == 3)
11454
            {
11455
              if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
11456
                emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
11457
              emit_move_insn (part[0][2], part[1][2]);
11458
            }
11459
        }
11460
      else
11461
        {
11462
          /* In 64bit mode we don't have 32bit push available.  In case this is
11463
             register, it is OK - we will just use larger counterpart.  We also
11464
             retype memory - these comes from attempt to avoid REX prefix on
11465
             moving of second half of TFmode value.  */
11466
          if (GET_MODE (part[1][1]) == SImode)
11467
            {
11468
              switch (GET_CODE (part[1][1]))
11469
                {
11470
                case MEM:
11471
                  part[1][1] = adjust_address (part[1][1], DImode, 0);
11472
                  break;
11473
 
11474
                case REG:
11475
                  part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
11476
                  break;
11477
 
11478
                default:
11479
                  gcc_unreachable ();
11480
                }
11481
 
11482
              if (GET_MODE (part[1][0]) == SImode)
11483
                part[1][0] = part[1][1];
11484
            }
11485
        }
11486
      emit_move_insn (part[0][1], part[1][1]);
11487
      emit_move_insn (part[0][0], part[1][0]);
11488
      return;
11489
    }
11490
 
11491
  /* Choose correct order to not overwrite the source before it is copied.  */
11492
  if ((REG_P (part[0][0])
11493
       && REG_P (part[1][1])
11494
       && (REGNO (part[0][0]) == REGNO (part[1][1])
11495
           || (nparts == 3
11496
               && REGNO (part[0][0]) == REGNO (part[1][2]))))
11497
      || (collisions > 0
11498
          && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
11499
    {
11500
      if (nparts == 3)
11501
        {
11502
          operands[2] = part[0][2];
11503
          operands[3] = part[0][1];
11504
          operands[4] = part[0][0];
11505
          operands[5] = part[1][2];
11506
          operands[6] = part[1][1];
11507
          operands[7] = part[1][0];
11508
        }
11509
      else
11510
        {
11511
          operands[2] = part[0][1];
11512
          operands[3] = part[0][0];
11513
          operands[5] = part[1][1];
11514
          operands[6] = part[1][0];
11515
        }
11516
    }
11517
  else
11518
    {
11519
      if (nparts == 3)
11520
        {
11521
          operands[2] = part[0][0];
11522
          operands[3] = part[0][1];
11523
          operands[4] = part[0][2];
11524
          operands[5] = part[1][0];
11525
          operands[6] = part[1][1];
11526
          operands[7] = part[1][2];
11527
        }
11528
      else
11529
        {
11530
          operands[2] = part[0][0];
11531
          operands[3] = part[0][1];
11532
          operands[5] = part[1][0];
11533
          operands[6] = part[1][1];
11534
        }
11535
    }
11536
 
11537
  /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
11538
  if (optimize_size)
11539
    {
11540
      if (GET_CODE (operands[5]) == CONST_INT
11541
          && operands[5] != const0_rtx
11542
          && REG_P (operands[2]))
11543
        {
11544
          if (GET_CODE (operands[6]) == CONST_INT
11545
              && INTVAL (operands[6]) == INTVAL (operands[5]))
11546
            operands[6] = operands[2];
11547
 
11548
          if (nparts == 3
11549
              && GET_CODE (operands[7]) == CONST_INT
11550
              && INTVAL (operands[7]) == INTVAL (operands[5]))
11551
            operands[7] = operands[2];
11552
        }
11553
 
11554
      if (nparts == 3
11555
          && GET_CODE (operands[6]) == CONST_INT
11556
          && operands[6] != const0_rtx
11557
          && REG_P (operands[3])
11558
          && GET_CODE (operands[7]) == CONST_INT
11559
          && INTVAL (operands[7]) == INTVAL (operands[6]))
11560
        operands[7] = operands[3];
11561
    }
11562
 
11563
  emit_move_insn (operands[2], operands[5]);
11564
  emit_move_insn (operands[3], operands[6]);
11565
  if (nparts == 3)
11566
    emit_move_insn (operands[4], operands[7]);
11567
 
11568
  return;
11569
}
11570
 
11571
/* Helper function of ix86_split_ashl used to generate an SImode/DImode
11572
   left shift by a constant, either using a single shift or
11573
   a sequence of add instructions.  */
11574
 
11575
static void
11576
ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
11577
{
11578
  if (count == 1)
11579
    {
11580
      emit_insn ((mode == DImode
11581
                  ? gen_addsi3
11582
                  : gen_adddi3) (operand, operand, operand));
11583
    }
11584
  else if (!optimize_size
11585
           && count * ix86_cost->add <= ix86_cost->shift_const)
11586
    {
11587
      int i;
11588
      for (i=0; i<count; i++)
11589
        {
11590
          emit_insn ((mode == DImode
11591
                      ? gen_addsi3
11592
                      : gen_adddi3) (operand, operand, operand));
11593
        }
11594
    }
11595
  else
11596
    emit_insn ((mode == DImode
11597
                ? gen_ashlsi3
11598
                : gen_ashldi3) (operand, operand, GEN_INT (count)));
11599
}
11600
 
11601
void
11602
ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
11603
{
11604
  rtx low[2], high[2];
11605
  int count;
11606
  const int single_width = mode == DImode ? 32 : 64;
11607
 
11608
  if (GET_CODE (operands[2]) == CONST_INT)
11609
    {
11610
      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11611
      count = INTVAL (operands[2]) & (single_width * 2 - 1);
11612
 
11613
      if (count >= single_width)
11614
        {
11615
          emit_move_insn (high[0], low[1]);
11616
          emit_move_insn (low[0], const0_rtx);
11617
 
11618
          if (count > single_width)
11619
            ix86_expand_ashl_const (high[0], count - single_width, mode);
11620
        }
11621
      else
11622
        {
11623
          if (!rtx_equal_p (operands[0], operands[1]))
11624
            emit_move_insn (operands[0], operands[1]);
11625
          emit_insn ((mode == DImode
11626
                     ? gen_x86_shld_1
11627
                     : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
11628
          ix86_expand_ashl_const (low[0], count, mode);
11629
        }
11630
      return;
11631
    }
11632
 
11633
  (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11634
 
11635
  if (operands[1] == const1_rtx)
11636
    {
11637
      /* Assuming we've chosen a QImode capable registers, then 1 << N
11638
         can be done with two 32/64-bit shifts, no branches, no cmoves.  */
11639
      if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
11640
        {
11641
          rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
11642
 
11643
          ix86_expand_clear (low[0]);
11644
          ix86_expand_clear (high[0]);
11645
          emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
11646
 
11647
          d = gen_lowpart (QImode, low[0]);
11648
          d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11649
          s = gen_rtx_EQ (QImode, flags, const0_rtx);
11650
          emit_insn (gen_rtx_SET (VOIDmode, d, s));
11651
 
11652
          d = gen_lowpart (QImode, high[0]);
11653
          d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11654
          s = gen_rtx_NE (QImode, flags, const0_rtx);
11655
          emit_insn (gen_rtx_SET (VOIDmode, d, s));
11656
        }
11657
 
11658
      /* Otherwise, we can get the same results by manually performing
11659
         a bit extract operation on bit 5/6, and then performing the two
11660
         shifts.  The two methods of getting 0/1 into low/high are exactly
11661
         the same size.  Avoiding the shift in the bit extract case helps
11662
         pentium4 a bit; no one else seems to care much either way.  */
11663
      else
11664
        {
11665
          rtx x;
11666
 
11667
          if (TARGET_PARTIAL_REG_STALL && !optimize_size)
11668
            x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
11669
          else
11670
            x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
11671
          emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
11672
 
11673
          emit_insn ((mode == DImode
11674
                      ? gen_lshrsi3
11675
                      : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
11676
          emit_insn ((mode == DImode
11677
                      ? gen_andsi3
11678
                      : gen_anddi3) (high[0], high[0], GEN_INT (1)));
11679
          emit_move_insn (low[0], high[0]);
11680
          emit_insn ((mode == DImode
11681
                      ? gen_xorsi3
11682
                      : gen_xordi3) (low[0], low[0], GEN_INT (1)));
11683
        }
11684
 
11685
      emit_insn ((mode == DImode
11686
                    ? gen_ashlsi3
11687
                    : gen_ashldi3) (low[0], low[0], operands[2]));
11688
      emit_insn ((mode == DImode
11689
                    ? gen_ashlsi3
11690
                    : gen_ashldi3) (high[0], high[0], operands[2]));
11691
      return;
11692
    }
11693
 
11694
  if (operands[1] == constm1_rtx)
11695
    {
11696
      /* For -1 << N, we can avoid the shld instruction, because we
11697
         know that we're shifting 0...31/63 ones into a -1.  */
11698
      emit_move_insn (low[0], constm1_rtx);
11699
      if (optimize_size)
11700
        emit_move_insn (high[0], low[0]);
11701
      else
11702
        emit_move_insn (high[0], constm1_rtx);
11703
    }
11704
  else
11705
    {
11706
      if (!rtx_equal_p (operands[0], operands[1]))
11707
        emit_move_insn (operands[0], operands[1]);
11708
 
11709
      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11710
      emit_insn ((mode == DImode
11711
                  ? gen_x86_shld_1
11712
                  : gen_x86_64_shld) (high[0], low[0], operands[2]));
11713
    }
11714
 
11715
  emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
11716
 
11717
  if (TARGET_CMOVE && scratch)
11718
    {
11719
      ix86_expand_clear (scratch);
11720
      emit_insn ((mode == DImode
11721
                  ? gen_x86_shift_adj_1
11722
                  : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
11723
    }
11724
  else
11725
    emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
11726
}
11727
 
11728
void
11729
ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
11730
{
11731
  rtx low[2], high[2];
11732
  int count;
11733
  const int single_width = mode == DImode ? 32 : 64;
11734
 
11735
  if (GET_CODE (operands[2]) == CONST_INT)
11736
    {
11737
      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11738
      count = INTVAL (operands[2]) & (single_width * 2 - 1);
11739
 
11740
      if (count == single_width * 2 - 1)
11741
        {
11742
          emit_move_insn (high[0], high[1]);
11743
          emit_insn ((mode == DImode
11744
                      ? gen_ashrsi3
11745
                      : gen_ashrdi3) (high[0], high[0],
11746
                                      GEN_INT (single_width - 1)));
11747
          emit_move_insn (low[0], high[0]);
11748
 
11749
        }
11750
      else if (count >= single_width)
11751
        {
11752
          emit_move_insn (low[0], high[1]);
11753
          emit_move_insn (high[0], low[0]);
11754
          emit_insn ((mode == DImode
11755
                      ? gen_ashrsi3
11756
                      : gen_ashrdi3) (high[0], high[0],
11757
                                      GEN_INT (single_width - 1)));
11758
          if (count > single_width)
11759
            emit_insn ((mode == DImode
11760
                        ? gen_ashrsi3
11761
                        : gen_ashrdi3) (low[0], low[0],
11762
                                        GEN_INT (count - single_width)));
11763
        }
11764
      else
11765
        {
11766
          if (!rtx_equal_p (operands[0], operands[1]))
11767
            emit_move_insn (operands[0], operands[1]);
11768
          emit_insn ((mode == DImode
11769
                      ? gen_x86_shrd_1
11770
                      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
11771
          emit_insn ((mode == DImode
11772
                      ? gen_ashrsi3
11773
                      : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
11774
        }
11775
    }
11776
  else
11777
    {
11778
      if (!rtx_equal_p (operands[0], operands[1]))
11779
        emit_move_insn (operands[0], operands[1]);
11780
 
11781
      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11782
 
11783
      emit_insn ((mode == DImode
11784
                  ? gen_x86_shrd_1
11785
                  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
11786
      emit_insn ((mode == DImode
11787
                  ? gen_ashrsi3
11788
                  : gen_ashrdi3)  (high[0], high[0], operands[2]));
11789
 
11790
      if (TARGET_CMOVE && scratch)
11791
        {
11792
          emit_move_insn (scratch, high[0]);
11793
          emit_insn ((mode == DImode
11794
                      ? gen_ashrsi3
11795
                      : gen_ashrdi3) (scratch, scratch,
11796
                                      GEN_INT (single_width - 1)));
11797
          emit_insn ((mode == DImode
11798
                      ? gen_x86_shift_adj_1
11799
                      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
11800
                                         scratch));
11801
        }
11802
      else
11803
        emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11804
    }
11805
}
11806
 
11807
void
11808
ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
11809
{
11810
  rtx low[2], high[2];
11811
  int count;
11812
  const int single_width = mode == DImode ? 32 : 64;
11813
 
11814
  if (GET_CODE (operands[2]) == CONST_INT)
11815
    {
11816
      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11817
      count = INTVAL (operands[2]) & (single_width * 2 - 1);
11818
 
11819
      if (count >= single_width)
11820
        {
11821
          emit_move_insn (low[0], high[1]);
11822
          ix86_expand_clear (high[0]);
11823
 
11824
          if (count > single_width)
11825
            emit_insn ((mode == DImode
11826
                        ? gen_lshrsi3
11827
                        : gen_lshrdi3) (low[0], low[0],
11828
                                        GEN_INT (count - single_width)));
11829
        }
11830
      else
11831
        {
11832
          if (!rtx_equal_p (operands[0], operands[1]))
11833
            emit_move_insn (operands[0], operands[1]);
11834
          emit_insn ((mode == DImode
11835
                      ? gen_x86_shrd_1
11836
                      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
11837
          emit_insn ((mode == DImode
11838
                      ? gen_lshrsi3
11839
                      : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
11840
        }
11841
    }
11842
  else
11843
    {
11844
      if (!rtx_equal_p (operands[0], operands[1]))
11845
        emit_move_insn (operands[0], operands[1]);
11846
 
11847
      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11848
 
11849
      emit_insn ((mode == DImode
11850
                  ? gen_x86_shrd_1
11851
                  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
11852
      emit_insn ((mode == DImode
11853
                  ? gen_lshrsi3
11854
                  : gen_lshrdi3) (high[0], high[0], operands[2]));
11855
 
11856
      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
11857
      if (TARGET_CMOVE && scratch)
11858
        {
11859
          ix86_expand_clear (scratch);
11860
          emit_insn ((mode == DImode
11861
                      ? gen_x86_shift_adj_1
11862
                      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
11863
                                               scratch));
11864
        }
11865
      else
11866
        emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11867
    }
11868
}
11869
 
11870
/* Helper function for the string operations below.  Dest VARIABLE whether
11871
   it is aligned to VALUE bytes.  If true, jump to the label.  */
11872
static rtx
11873
ix86_expand_aligntest (rtx variable, int value)
11874
{
11875
  rtx label = gen_label_rtx ();
11876
  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11877
  if (GET_MODE (variable) == DImode)
11878
    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11879
  else
11880
    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11881
  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11882
                           1, label);
11883
  return label;
11884
}
11885
 
11886
/* Adjust COUNTER by the VALUE.  */
11887
static void
11888
ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11889
{
11890
  if (GET_MODE (countreg) == DImode)
11891
    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11892
  else
11893
    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11894
}
11895
 
11896
/* Zero extend possibly SImode EXP to Pmode register.  */
11897
rtx
11898
ix86_zero_extend_to_Pmode (rtx exp)
11899
{
11900
  rtx r;
11901
  if (GET_MODE (exp) == VOIDmode)
11902
    return force_reg (Pmode, exp);
11903
  if (GET_MODE (exp) == Pmode)
11904
    return copy_to_mode_reg (Pmode, exp);
11905
  r = gen_reg_rtx (Pmode);
11906
  emit_insn (gen_zero_extendsidi2 (r, exp));
11907
  return r;
11908
}
11909
 
11910
/* Expand string move (memcpy) operation.  Use i386 string operations when
11911
   profitable.  expand_clrmem contains similar code.  */
11912
int
11913
ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11914
{
11915
  rtx srcreg, destreg, countreg, srcexp, destexp;
11916
  enum machine_mode counter_mode;
11917
  HOST_WIDE_INT align = 0;
11918
  unsigned HOST_WIDE_INT count = 0;
11919
 
11920
  if (GET_CODE (align_exp) == CONST_INT)
11921
    align = INTVAL (align_exp);
11922
 
11923
  /* Can't use any of this if the user has appropriated esi or edi.  */
11924
  if (global_regs[4] || global_regs[5])
11925
    return 0;
11926
 
11927
  /* This simple hack avoids all inlining code and simplifies code below.  */
11928
  if (!TARGET_ALIGN_STRINGOPS)
11929
    align = 64;
11930
 
11931
  if (GET_CODE (count_exp) == CONST_INT)
11932
    {
11933
      count = INTVAL (count_exp);
11934
      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11935
        return 0;
11936
    }
11937
 
11938
  /* Figure out proper mode for counter.  For 32bits it is always SImode,
11939
     for 64bits use SImode when possible, otherwise DImode.
11940
     Set count to number of bytes copied when known at compile time.  */
11941
  if (!TARGET_64BIT
11942
      || GET_MODE (count_exp) == SImode
11943
      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11944
    counter_mode = SImode;
11945
  else
11946
    counter_mode = DImode;
11947
 
11948
  gcc_assert (counter_mode == SImode || counter_mode == DImode);
11949
 
11950
  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11951
  if (destreg != XEXP (dst, 0))
11952
    dst = replace_equiv_address_nv (dst, destreg);
11953
  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11954
  if (srcreg != XEXP (src, 0))
11955
    src = replace_equiv_address_nv (src, srcreg);
11956
 
11957
  /* When optimizing for size emit simple rep ; movsb instruction for
11958
     counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
11959
     sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
11960
     Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
11961
     count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
11962
     but we don't know whether upper 24 (resp. 56) bits of %ecx will be
11963
     known to be zero or not.  The rep; movsb sequence causes higher
11964
     register pressure though, so take that into account.  */
11965
 
11966
  if ((!optimize || optimize_size)
11967
      && (count == 0
11968
          || ((count & 0x03)
11969
              && (!optimize_size
11970
                  || count > 5 * 4
11971
                  || (count & 3) + count / 4 > 6))))
11972
    {
11973
      emit_insn (gen_cld ());
11974
      countreg = ix86_zero_extend_to_Pmode (count_exp);
11975
      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11976
      srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11977
      emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11978
                              destexp, srcexp));
11979
    }
11980
 
11981
  /* For constant aligned (or small unaligned) copies use rep movsl
11982
     followed by code copying the rest.  For PentiumPro ensure 8 byte
11983
     alignment to allow rep movsl acceleration.  */
11984
 
11985
  else if (count != 0
11986
           && (align >= 8
11987
               || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11988
               || optimize_size || count < (unsigned int) 64))
11989
    {
11990
      unsigned HOST_WIDE_INT offset = 0;
11991
      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11992
      rtx srcmem, dstmem;
11993
 
11994
      emit_insn (gen_cld ());
11995
      if (count & ~(size - 1))
11996
        {
11997
          if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
11998
            {
11999
              enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12000
 
12001
              while (offset < (count & ~(size - 1)))
12002
                {
12003
                  srcmem = adjust_automodify_address_nv (src, movs_mode,
12004
                                                         srcreg, offset);
12005
                  dstmem = adjust_automodify_address_nv (dst, movs_mode,
12006
                                                         destreg, offset);
12007
                  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12008
                  offset += size;
12009
                }
12010
            }
12011
          else
12012
            {
12013
              countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12014
                                  & (TARGET_64BIT ? -1 : 0x3fffffff));
12015
              countreg = copy_to_mode_reg (counter_mode, countreg);
12016
              countreg = ix86_zero_extend_to_Pmode (countreg);
12017
 
12018
              destexp = gen_rtx_ASHIFT (Pmode, countreg,
12019
                                        GEN_INT (size == 4 ? 2 : 3));
12020
              srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12021
              destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12022
 
12023
              emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12024
                                      countreg, destexp, srcexp));
12025
              offset = count & ~(size - 1);
12026
            }
12027
        }
12028
      if (size == 8 && (count & 0x04))
12029
        {
12030
          srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12031
                                                 offset);
12032
          dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12033
                                                 offset);
12034
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12035
          offset += 4;
12036
        }
12037
      if (count & 0x02)
12038
        {
12039
          srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12040
                                                 offset);
12041
          dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12042
                                                 offset);
12043
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12044
          offset += 2;
12045
        }
12046
      if (count & 0x01)
12047
        {
12048
          srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12049
                                                 offset);
12050
          dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12051
                                                 offset);
12052
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12053
        }
12054
    }
12055
  /* The generic code based on the glibc implementation:
12056
     - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12057
     allowing accelerated copying there)
12058
     - copy the data using rep movsl
12059
     - copy the rest.  */
12060
  else
12061
    {
12062
      rtx countreg2;
12063
      rtx label = NULL;
12064
      rtx srcmem, dstmem;
12065
      int desired_alignment = (TARGET_PENTIUMPRO
12066
                               && (count == 0 || count >= (unsigned int) 260)
12067
                               ? 8 : UNITS_PER_WORD);
12068
      /* Get rid of MEM_OFFSETs, they won't be accurate.  */
12069
      dst = change_address (dst, BLKmode, destreg);
12070
      src = change_address (src, BLKmode, srcreg);
12071
 
12072
      /* In case we don't know anything about the alignment, default to
12073
         library version, since it is usually equally fast and result in
12074
         shorter code.
12075
 
12076
         Also emit call when we know that the count is large and call overhead
12077
         will not be important.  */
12078
      if (!TARGET_INLINE_ALL_STRINGOPS
12079
          && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12080
        return 0;
12081
 
12082
      if (TARGET_SINGLE_STRINGOP)
12083
        emit_insn (gen_cld ());
12084
 
12085
      countreg2 = gen_reg_rtx (Pmode);
12086
      countreg = copy_to_mode_reg (counter_mode, count_exp);
12087
 
12088
      /* We don't use loops to align destination and to copy parts smaller
12089
         than 4 bytes, because gcc is able to optimize such code better (in
12090
         the case the destination or the count really is aligned, gcc is often
12091
         able to predict the branches) and also it is friendlier to the
12092
         hardware branch prediction.
12093
 
12094
         Using loops is beneficial for generic case, because we can
12095
         handle small counts using the loops.  Many CPUs (such as Athlon)
12096
         have large REP prefix setup costs.
12097
 
12098
         This is quite costly.  Maybe we can revisit this decision later or
12099
         add some customizability to this code.  */
12100
 
12101
      if (count == 0 && align < desired_alignment)
12102
        {
12103
          label = gen_label_rtx ();
12104
          emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12105
                                   LEU, 0, counter_mode, 1, label);
12106
        }
12107
      if (align <= 1)
12108
        {
12109
          rtx label = ix86_expand_aligntest (destreg, 1);
12110
          srcmem = change_address (src, QImode, srcreg);
12111
          dstmem = change_address (dst, QImode, destreg);
12112
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12113
          ix86_adjust_counter (countreg, 1);
12114
          emit_label (label);
12115
          LABEL_NUSES (label) = 1;
12116
        }
12117
      if (align <= 2)
12118
        {
12119
          rtx label = ix86_expand_aligntest (destreg, 2);
12120
          srcmem = change_address (src, HImode, srcreg);
12121
          dstmem = change_address (dst, HImode, destreg);
12122
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12123
          ix86_adjust_counter (countreg, 2);
12124
          emit_label (label);
12125
          LABEL_NUSES (label) = 1;
12126
        }
12127
      if (align <= 4 && desired_alignment > 4)
12128
        {
12129
          rtx label = ix86_expand_aligntest (destreg, 4);
12130
          srcmem = change_address (src, SImode, srcreg);
12131
          dstmem = change_address (dst, SImode, destreg);
12132
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12133
          ix86_adjust_counter (countreg, 4);
12134
          emit_label (label);
12135
          LABEL_NUSES (label) = 1;
12136
        }
12137
 
12138
      if (label && desired_alignment > 4 && !TARGET_64BIT)
12139
        {
12140
          emit_label (label);
12141
          LABEL_NUSES (label) = 1;
12142
          label = NULL_RTX;
12143
        }
12144
      if (!TARGET_SINGLE_STRINGOP)
12145
        emit_insn (gen_cld ());
12146
      if (TARGET_64BIT)
12147
        {
12148
          emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12149
                                  GEN_INT (3)));
12150
          destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12151
        }
12152
      else
12153
        {
12154
          emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12155
          destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12156
        }
12157
      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12158
      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12159
      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12160
                              countreg2, destexp, srcexp));
12161
 
12162
      if (label)
12163
        {
12164
          emit_label (label);
12165
          LABEL_NUSES (label) = 1;
12166
        }
12167
      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12168
        {
12169
          srcmem = change_address (src, SImode, srcreg);
12170
          dstmem = change_address (dst, SImode, destreg);
12171
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12172
        }
12173
      if ((align <= 4 || count == 0) && TARGET_64BIT)
12174
        {
12175
          rtx label = ix86_expand_aligntest (countreg, 4);
12176
          srcmem = change_address (src, SImode, srcreg);
12177
          dstmem = change_address (dst, SImode, destreg);
12178
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12179
          emit_label (label);
12180
          LABEL_NUSES (label) = 1;
12181
        }
12182
      if (align > 2 && count != 0 && (count & 2))
12183
        {
12184
          srcmem = change_address (src, HImode, srcreg);
12185
          dstmem = change_address (dst, HImode, destreg);
12186
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12187
        }
12188
      if (align <= 2 || count == 0)
12189
        {
12190
          rtx label = ix86_expand_aligntest (countreg, 2);
12191
          srcmem = change_address (src, HImode, srcreg);
12192
          dstmem = change_address (dst, HImode, destreg);
12193
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12194
          emit_label (label);
12195
          LABEL_NUSES (label) = 1;
12196
        }
12197
      if (align > 1 && count != 0 && (count & 1))
12198
        {
12199
          srcmem = change_address (src, QImode, srcreg);
12200
          dstmem = change_address (dst, QImode, destreg);
12201
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12202
        }
12203
      if (align <= 1 || count == 0)
12204
        {
12205
          rtx label = ix86_expand_aligntest (countreg, 1);
12206
          srcmem = change_address (src, QImode, srcreg);
12207
          dstmem = change_address (dst, QImode, destreg);
12208
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12209
          emit_label (label);
12210
          LABEL_NUSES (label) = 1;
12211
        }
12212
    }
12213
 
12214
  return 1;
12215
}
12216
 
12217
/* Expand string clear operation (bzero).  Use i386 string operations when
12218
   profitable.  expand_movmem contains similar code.  */
12219
int
12220
ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12221
{
12222
  rtx destreg, zeroreg, countreg, destexp;
12223
  enum machine_mode counter_mode;
12224
  HOST_WIDE_INT align = 0;
12225
  unsigned HOST_WIDE_INT count = 0;
12226
 
12227
  if (GET_CODE (align_exp) == CONST_INT)
12228
    align = INTVAL (align_exp);
12229
 
12230
  /* Can't use any of this if the user has appropriated esi.  */
12231
  if (global_regs[4])
12232
    return 0;
12233
 
12234
  /* This simple hack avoids all inlining code and simplifies code below.  */
12235
  if (!TARGET_ALIGN_STRINGOPS)
12236
    align = 32;
12237
 
12238
  if (GET_CODE (count_exp) == CONST_INT)
12239
    {
12240
      count = INTVAL (count_exp);
12241
      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12242
        return 0;
12243
    }
12244
  /* Figure out proper mode for counter.  For 32bits it is always SImode,
12245
     for 64bits use SImode when possible, otherwise DImode.
12246
     Set count to number of bytes copied when known at compile time.  */
12247
  if (!TARGET_64BIT
12248
      || GET_MODE (count_exp) == SImode
12249
      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12250
    counter_mode = SImode;
12251
  else
12252
    counter_mode = DImode;
12253
 
12254
  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12255
  if (destreg != XEXP (dst, 0))
12256
    dst = replace_equiv_address_nv (dst, destreg);
12257
 
12258
 
12259
  /* When optimizing for size emit simple rep ; movsb instruction for
12260
     counts not divisible by 4.  The movl $N, %ecx; rep; stosb
12261
     sequence is 7 bytes long, so if optimizing for size and count is
12262
     small enough that some stosl, stosw and stosb instructions without
12263
     rep are shorter, fall back into the next if.  */
12264
 
12265
  if ((!optimize || optimize_size)
12266
      && (count == 0
12267
          || ((count & 0x03)
12268
              && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12269
    {
12270
      emit_insn (gen_cld ());
12271
 
12272
      countreg = ix86_zero_extend_to_Pmode (count_exp);
12273
      zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12274
      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12275
      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12276
    }
12277
  else if (count != 0
12278
           && (align >= 8
12279
               || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12280
               || optimize_size || count < (unsigned int) 64))
12281
    {
12282
      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12283
      unsigned HOST_WIDE_INT offset = 0;
12284
 
12285
      emit_insn (gen_cld ());
12286
 
12287
      zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12288
      if (count & ~(size - 1))
12289
        {
12290
          unsigned HOST_WIDE_INT repcount;
12291
          unsigned int max_nonrep;
12292
 
12293
          repcount = count >> (size == 4 ? 2 : 3);
12294
          if (!TARGET_64BIT)
12295
            repcount &= 0x3fffffff;
12296
 
12297
          /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12298
             movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12299
             bytes.  In both cases the latter seems to be faster for small
12300
             values of N.  */
12301
          max_nonrep = size == 4 ? 7 : 4;
12302
          if (!optimize_size)
12303
            switch (ix86_tune)
12304
              {
12305
              case PROCESSOR_PENTIUM4:
12306
              case PROCESSOR_NOCONA:
12307
                max_nonrep = 3;
12308
                break;
12309
              default:
12310
                break;
12311
              }
12312
 
12313
          if (repcount <= max_nonrep)
12314
            while (repcount-- > 0)
12315
              {
12316
                rtx mem = adjust_automodify_address_nv (dst,
12317
                                                        GET_MODE (zeroreg),
12318
                                                        destreg, offset);
12319
                emit_insn (gen_strset (destreg, mem, zeroreg));
12320
                offset += size;
12321
              }
12322
          else
12323
            {
12324
              countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12325
              countreg = ix86_zero_extend_to_Pmode (countreg);
12326
              destexp = gen_rtx_ASHIFT (Pmode, countreg,
12327
                                        GEN_INT (size == 4 ? 2 : 3));
12328
              destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12329
              emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12330
                                       destexp));
12331
              offset = count & ~(size - 1);
12332
            }
12333
        }
12334
      if (size == 8 && (count & 0x04))
12335
        {
12336
          rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12337
                                                  offset);
12338
          emit_insn (gen_strset (destreg, mem,
12339
                                 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12340
          offset += 4;
12341
        }
12342
      if (count & 0x02)
12343
        {
12344
          rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12345
                                                  offset);
12346
          emit_insn (gen_strset (destreg, mem,
12347
                                 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12348
          offset += 2;
12349
        }
12350
      if (count & 0x01)
12351
        {
12352
          rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12353
                                                  offset);
12354
          emit_insn (gen_strset (destreg, mem,
12355
                                 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12356
        }
12357
    }
12358
  else
12359
    {
12360
      rtx countreg2;
12361
      rtx label = NULL;
12362
      /* Compute desired alignment of the string operation.  */
12363
      int desired_alignment = (TARGET_PENTIUMPRO
12364
                               && (count == 0 || count >= (unsigned int) 260)
12365
                               ? 8 : UNITS_PER_WORD);
12366
 
12367
      /* In case we don't know anything about the alignment, default to
12368
         library version, since it is usually equally fast and result in
12369
         shorter code.
12370
 
12371
         Also emit call when we know that the count is large and call overhead
12372
         will not be important.  */
12373
      if (!TARGET_INLINE_ALL_STRINGOPS
12374
          && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12375
        return 0;
12376
 
12377
      if (TARGET_SINGLE_STRINGOP)
12378
        emit_insn (gen_cld ());
12379
 
12380
      countreg2 = gen_reg_rtx (Pmode);
12381
      countreg = copy_to_mode_reg (counter_mode, count_exp);
12382
      zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12383
      /* Get rid of MEM_OFFSET, it won't be accurate.  */
12384
      dst = change_address (dst, BLKmode, destreg);
12385
 
12386
      if (count == 0 && align < desired_alignment)
12387
        {
12388
          label = gen_label_rtx ();
12389
          emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12390
                                   LEU, 0, counter_mode, 1, label);
12391
        }
12392
      if (align <= 1)
12393
        {
12394
          rtx label = ix86_expand_aligntest (destreg, 1);
12395
          emit_insn (gen_strset (destreg, dst,
12396
                                 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12397
          ix86_adjust_counter (countreg, 1);
12398
          emit_label (label);
12399
          LABEL_NUSES (label) = 1;
12400
        }
12401
      if (align <= 2)
12402
        {
12403
          rtx label = ix86_expand_aligntest (destreg, 2);
12404
          emit_insn (gen_strset (destreg, dst,
12405
                                 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12406
          ix86_adjust_counter (countreg, 2);
12407
          emit_label (label);
12408
          LABEL_NUSES (label) = 1;
12409
        }
12410
      if (align <= 4 && desired_alignment > 4)
12411
        {
12412
          rtx label = ix86_expand_aligntest (destreg, 4);
12413
          emit_insn (gen_strset (destreg, dst,
12414
                                 (TARGET_64BIT
12415
                                  ? gen_rtx_SUBREG (SImode, zeroreg, 0)
12416
                                  : zeroreg)));
12417
          ix86_adjust_counter (countreg, 4);
12418
          emit_label (label);
12419
          LABEL_NUSES (label) = 1;
12420
        }
12421
 
12422
      if (label && desired_alignment > 4 && !TARGET_64BIT)
12423
        {
12424
          emit_label (label);
12425
          LABEL_NUSES (label) = 1;
12426
          label = NULL_RTX;
12427
        }
12428
 
12429
      if (!TARGET_SINGLE_STRINGOP)
12430
        emit_insn (gen_cld ());
12431
      if (TARGET_64BIT)
12432
        {
12433
          emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12434
                                  GEN_INT (3)));
12435
          destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12436
        }
12437
      else
12438
        {
12439
          emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12440
          destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12441
        }
12442
      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12443
      emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
12444
 
12445
      if (label)
12446
        {
12447
          emit_label (label);
12448
          LABEL_NUSES (label) = 1;
12449
        }
12450
 
12451
      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12452
        emit_insn (gen_strset (destreg, dst,
12453
                               gen_rtx_SUBREG (SImode, zeroreg, 0)));
12454
      if (TARGET_64BIT && (align <= 4 || count == 0))
12455
        {
12456
          rtx label = ix86_expand_aligntest (countreg, 4);
12457
          emit_insn (gen_strset (destreg, dst,
12458
                                 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12459
          emit_label (label);
12460
          LABEL_NUSES (label) = 1;
12461
        }
12462
      if (align > 2 && count != 0 && (count & 2))
12463
        emit_insn (gen_strset (destreg, dst,
12464
                               gen_rtx_SUBREG (HImode, zeroreg, 0)));
12465
      if (align <= 2 || count == 0)
12466
        {
12467
          rtx label = ix86_expand_aligntest (countreg, 2);
12468
          emit_insn (gen_strset (destreg, dst,
12469
                                 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12470
          emit_label (label);
12471
          LABEL_NUSES (label) = 1;
12472
        }
12473
      if (align > 1 && count != 0 && (count & 1))
12474
        emit_insn (gen_strset (destreg, dst,
12475
                               gen_rtx_SUBREG (QImode, zeroreg, 0)));
12476
      if (align <= 1 || count == 0)
12477
        {
12478
          rtx label = ix86_expand_aligntest (countreg, 1);
12479
          emit_insn (gen_strset (destreg, dst,
12480
                                 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12481
          emit_label (label);
12482
          LABEL_NUSES (label) = 1;
12483
        }
12484
    }
12485
  return 1;
12486
}
12487
 
12488
/* Expand strlen.  */
12489
int
12490
ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
12491
{
12492
  rtx addr, scratch1, scratch2, scratch3, scratch4;
12493
 
12494
  /* The generic case of strlen expander is long.  Avoid it's
12495
     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
12496
 
12497
  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12498
      && !TARGET_INLINE_ALL_STRINGOPS
12499
      && !optimize_size
12500
      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
12501
    return 0;
12502
 
12503
  addr = force_reg (Pmode, XEXP (src, 0));
12504
  scratch1 = gen_reg_rtx (Pmode);
12505
 
12506
  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12507
      && !optimize_size)
12508
    {
12509
      /* Well it seems that some optimizer does not combine a call like
12510
         foo(strlen(bar), strlen(bar));
12511
         when the move and the subtraction is done here.  It does calculate
12512
         the length just once when these instructions are done inside of
12513
         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
12514
         often used and I use one fewer register for the lifetime of
12515
         output_strlen_unroll() this is better.  */
12516
 
12517
      emit_move_insn (out, addr);
12518
 
12519
      ix86_expand_strlensi_unroll_1 (out, src, align);
12520
 
12521
      /* strlensi_unroll_1 returns the address of the zero at the end of
12522
         the string, like memchr(), so compute the length by subtracting
12523
         the start address.  */
12524
      if (TARGET_64BIT)
12525
        emit_insn (gen_subdi3 (out, out, addr));
12526
      else
12527
        emit_insn (gen_subsi3 (out, out, addr));
12528
    }
12529
  else
12530
    {
12531
      rtx unspec;
12532
      scratch2 = gen_reg_rtx (Pmode);
12533
      scratch3 = gen_reg_rtx (Pmode);
12534
      scratch4 = force_reg (Pmode, constm1_rtx);
12535
 
12536
      emit_move_insn (scratch3, addr);
12537
      eoschar = force_reg (QImode, eoschar);
12538
 
12539
      emit_insn (gen_cld ());
12540
      src = replace_equiv_address_nv (src, scratch3);
12541
 
12542
      /* If .md starts supporting :P, this can be done in .md.  */
12543
      unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
12544
                                                 scratch4), UNSPEC_SCAS);
12545
      emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
12546
      if (TARGET_64BIT)
12547
        {
12548
          emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
12549
          emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
12550
        }
12551
      else
12552
        {
12553
          emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
12554
          emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
12555
        }
12556
    }
12557
  return 1;
12558
}
12559
 
12560
/* Expand the appropriate insns for doing strlen if not just doing
12561
   repnz; scasb
12562
 
12563
   out = result, initialized with the start address
12564
   align_rtx = alignment of the address.
12565
   scratch = scratch register, initialized with the startaddress when
12566
        not aligned, otherwise undefined
12567
 
12568
   This is just the body. It needs the initializations mentioned above and
12569
   some address computing at the end.  These things are done in i386.md.  */
12570
 
12571
static void
12572
ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
12573
{
12574
  int align;
12575
  rtx tmp;
12576
  rtx align_2_label = NULL_RTX;
12577
  rtx align_3_label = NULL_RTX;
12578
  rtx align_4_label = gen_label_rtx ();
12579
  rtx end_0_label = gen_label_rtx ();
12580
  rtx mem;
12581
  rtx tmpreg = gen_reg_rtx (SImode);
12582
  rtx scratch = gen_reg_rtx (SImode);
12583
  rtx cmp;
12584
 
12585
  align = 0;
12586
  if (GET_CODE (align_rtx) == CONST_INT)
12587
    align = INTVAL (align_rtx);
12588
 
12589
  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
12590
 
12591
  /* Is there a known alignment and is it less than 4?  */
12592
  if (align < 4)
12593
    {
12594
      rtx scratch1 = gen_reg_rtx (Pmode);
12595
      emit_move_insn (scratch1, out);
12596
      /* Is there a known alignment and is it not 2? */
12597
      if (align != 2)
12598
        {
12599
          align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
12600
          align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
12601
 
12602
          /* Leave just the 3 lower bits.  */
12603
          align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
12604
                                    NULL_RTX, 0, OPTAB_WIDEN);
12605
 
12606
          emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12607
                                   Pmode, 1, align_4_label);
12608
          emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
12609
                                   Pmode, 1, align_2_label);
12610
          emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
12611
                                   Pmode, 1, align_3_label);
12612
        }
12613
      else
12614
        {
12615
          /* Since the alignment is 2, we have to check 2 or 0 bytes;
12616
             check if is aligned to 4 - byte.  */
12617
 
12618
          align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
12619
                                    NULL_RTX, 0, OPTAB_WIDEN);
12620
 
12621
          emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12622
                                   Pmode, 1, align_4_label);
12623
        }
12624
 
12625
      mem = change_address (src, QImode, out);
12626
 
12627
      /* Now compare the bytes.  */
12628
 
12629
      /* Compare the first n unaligned byte on a byte per byte basis.  */
12630
      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
12631
                               QImode, 1, end_0_label);
12632
 
12633
      /* Increment the address.  */
12634
      if (TARGET_64BIT)
12635
        emit_insn (gen_adddi3 (out, out, const1_rtx));
12636
      else
12637
        emit_insn (gen_addsi3 (out, out, const1_rtx));
12638
 
12639
      /* Not needed with an alignment of 2 */
12640
      if (align != 2)
12641
        {
12642
          emit_label (align_2_label);
12643
 
12644
          emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12645
                                   end_0_label);
12646
 
12647
          if (TARGET_64BIT)
12648
            emit_insn (gen_adddi3 (out, out, const1_rtx));
12649
          else
12650
            emit_insn (gen_addsi3 (out, out, const1_rtx));
12651
 
12652
          emit_label (align_3_label);
12653
        }
12654
 
12655
      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12656
                               end_0_label);
12657
 
12658
      if (TARGET_64BIT)
12659
        emit_insn (gen_adddi3 (out, out, const1_rtx));
12660
      else
12661
        emit_insn (gen_addsi3 (out, out, const1_rtx));
12662
    }
12663
 
12664
  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
12665
     align this loop.  It gives only huge programs, but does not help to
12666
     speed up.  */
12667
  emit_label (align_4_label);
12668
 
12669
  mem = change_address (src, SImode, out);
12670
  emit_move_insn (scratch, mem);
12671
  if (TARGET_64BIT)
12672
    emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
12673
  else
12674
    emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
12675
 
12676
  /* This formula yields a nonzero result iff one of the bytes is zero.
12677
     This saves three branches inside loop and many cycles.  */
12678
 
12679
  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
12680
  emit_insn (gen_one_cmplsi2 (scratch, scratch));
12681
  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
12682
  emit_insn (gen_andsi3 (tmpreg, tmpreg,
12683
                         gen_int_mode (0x80808080, SImode)));
12684
  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
12685
                           align_4_label);
12686
 
12687
  if (TARGET_CMOVE)
12688
    {
12689
       rtx reg = gen_reg_rtx (SImode);
12690
       rtx reg2 = gen_reg_rtx (Pmode);
12691
       emit_move_insn (reg, tmpreg);
12692
       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
12693
 
12694
       /* If zero is not in the first two bytes, move two bytes forward.  */
12695
       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12696
       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12697
       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12698
       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
12699
                               gen_rtx_IF_THEN_ELSE (SImode, tmp,
12700
                                                     reg,
12701
                                                     tmpreg)));
12702
       /* Emit lea manually to avoid clobbering of flags.  */
12703
       emit_insn (gen_rtx_SET (SImode, reg2,
12704
                               gen_rtx_PLUS (Pmode, out, const2_rtx)));
12705
 
12706
       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12707
       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12708
       emit_insn (gen_rtx_SET (VOIDmode, out,
12709
                               gen_rtx_IF_THEN_ELSE (Pmode, tmp,
12710
                                                     reg2,
12711
                                                     out)));
12712
 
12713
    }
12714
  else
12715
    {
12716
       rtx end_2_label = gen_label_rtx ();
12717
       /* Is zero in the first two bytes? */
12718
 
12719
       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12720
       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12721
       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
12722
       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12723
                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
12724
                            pc_rtx);
12725
       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12726
       JUMP_LABEL (tmp) = end_2_label;
12727
 
12728
       /* Not in the first two.  Move two bytes forward.  */
12729
       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
12730
       if (TARGET_64BIT)
12731
         emit_insn (gen_adddi3 (out, out, const2_rtx));
12732
       else
12733
         emit_insn (gen_addsi3 (out, out, const2_rtx));
12734
 
12735
       emit_label (end_2_label);
12736
 
12737
    }
12738
 
12739
  /* Avoid branch in fixing the byte.  */
12740
  tmpreg = gen_lowpart (QImode, tmpreg);
12741
  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
12742
  cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
12743
  if (TARGET_64BIT)
12744
    emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
12745
  else
12746
    emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
12747
 
12748
  emit_label (end_0_label);
12749
}
12750
 
12751
void
12752
ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
12753
                  rtx callarg2 ATTRIBUTE_UNUSED,
12754
                  rtx pop, int sibcall)
12755
{
12756
  rtx use = NULL, call;
12757
 
12758
  if (pop == const0_rtx)
12759
    pop = NULL;
12760
  gcc_assert (!TARGET_64BIT || !pop);
12761
 
12762
#if TARGET_MACHO
12763
  if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
12764
    fnaddr = machopic_indirect_call_target (fnaddr);
12765
#else
12766
  /* Static functions and indirect calls don't need the pic register.  */
12767
  if (! TARGET_64BIT && flag_pic
12768
      && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12769
      && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
12770
    use_reg (&use, pic_offset_table_rtx);
12771
 
12772
  if (TARGET_64BIT && INTVAL (callarg2) >= 0)
12773
    {
12774
      rtx al = gen_rtx_REG (QImode, 0);
12775
      emit_move_insn (al, callarg2);
12776
      use_reg (&use, al);
12777
    }
12778
#endif /* TARGET_MACHO */
12779
 
12780
  if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
12781
    {
12782
      fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12783
      fnaddr = gen_rtx_MEM (QImode, fnaddr);
12784
    }
12785
  if (sibcall && TARGET_64BIT
12786
      && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
12787
    {
12788
      rtx addr;
12789
      addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12790
      fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
12791
      emit_move_insn (fnaddr, addr);
12792
      fnaddr = gen_rtx_MEM (QImode, fnaddr);
12793
    }
12794
 
12795
  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
12796
  if (retval)
12797
    call = gen_rtx_SET (VOIDmode, retval, call);
12798
  if (pop)
12799
    {
12800
      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
12801
      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
12802
      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
12803
    }
12804
 
12805
  call = emit_call_insn (call);
12806
  if (use)
12807
    CALL_INSN_FUNCTION_USAGE (call) = use;
12808
}
12809
 
12810
 
12811
/* Clear stack slot assignments remembered from previous functions.
12812
   This is called from INIT_EXPANDERS once before RTL is emitted for each
12813
   function.  */
12814
 
12815
static struct machine_function *
12816
ix86_init_machine_status (void)
12817
{
12818
  struct machine_function *f;
12819
 
12820
  f = ggc_alloc_cleared (sizeof (struct machine_function));
12821
  f->use_fast_prologue_epilogue_nregs = -1;
12822
 
12823
  return f;
12824
}
12825
 
12826
/* Return a MEM corresponding to a stack slot with mode MODE.
12827
   Allocate a new slot if necessary.
12828
 
12829
   The RTL for a function can have several slots available: N is
12830
   which slot to use.  */
12831
 
12832
rtx
12833
assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
12834
{
12835
  struct stack_local_entry *s;
12836
 
12837
  gcc_assert (n < MAX_386_STACK_LOCALS);
12838
 
12839
  for (s = ix86_stack_locals; s; s = s->next)
12840
    if (s->mode == mode && s->n == n)
12841
      return s->rtl;
12842
 
12843
  s = (struct stack_local_entry *)
12844
    ggc_alloc (sizeof (struct stack_local_entry));
12845
  s->n = n;
12846
  s->mode = mode;
12847
  s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
12848
 
12849
  s->next = ix86_stack_locals;
12850
  ix86_stack_locals = s;
12851
  return s->rtl;
12852
}
12853
 
12854
/* Construct the SYMBOL_REF for the tls_get_addr function.  */
12855
 
12856
static GTY(()) rtx ix86_tls_symbol;
12857
rtx
12858
ix86_tls_get_addr (void)
12859
{
12860
 
12861
  if (!ix86_tls_symbol)
12862
    {
12863
      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
12864
                                            (TARGET_GNU_TLS && !TARGET_64BIT)
12865
                                            ? "___tls_get_addr"
12866
                                            : "__tls_get_addr");
12867
    }
12868
 
12869
  return ix86_tls_symbol;
12870
}
12871
 
12872
/* Calculate the length of the memory address in the instruction
12873
   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
12874
 
12875
int
12876
memory_address_length (rtx addr)
12877
{
12878
  struct ix86_address parts;
12879
  rtx base, index, disp;
12880
  int len;
12881
  int ok;
12882
 
12883
  if (GET_CODE (addr) == PRE_DEC
12884
      || GET_CODE (addr) == POST_INC
12885
      || GET_CODE (addr) == PRE_MODIFY
12886
      || GET_CODE (addr) == POST_MODIFY)
12887
    return 0;
12888
 
12889
  ok = ix86_decompose_address (addr, &parts);
12890
  gcc_assert (ok);
12891
 
12892
  if (parts.base && GET_CODE (parts.base) == SUBREG)
12893
    parts.base = SUBREG_REG (parts.base);
12894
  if (parts.index && GET_CODE (parts.index) == SUBREG)
12895
    parts.index = SUBREG_REG (parts.index);
12896
 
12897
  base = parts.base;
12898
  index = parts.index;
12899
  disp = parts.disp;
12900
  len = 0;
12901
 
12902
  /* Rule of thumb:
12903
       - esp as the base always wants an index,
12904
       - ebp as the base always wants a displacement.  */
12905
 
12906
  /* Register Indirect.  */
12907
  if (base && !index && !disp)
12908
    {
12909
      /* esp (for its index) and ebp (for its displacement) need
12910
         the two-byte modrm form.  */
12911
      if (addr == stack_pointer_rtx
12912
          || addr == arg_pointer_rtx
12913
          || addr == frame_pointer_rtx
12914
          || addr == hard_frame_pointer_rtx)
12915
        len = 1;
12916
    }
12917
 
12918
  /* Direct Addressing.  */
12919
  else if (disp && !base && !index)
12920
    len = 4;
12921
 
12922
  else
12923
    {
12924
      /* Find the length of the displacement constant.  */
12925
      if (disp)
12926
        {
12927
          if (GET_CODE (disp) == CONST_INT
12928
              && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12929
              && base)
12930
            len = 1;
12931
          else
12932
            len = 4;
12933
        }
12934
      /* ebp always wants a displacement.  */
12935
      else if (base == hard_frame_pointer_rtx)
12936
        len = 1;
12937
 
12938
      /* An index requires the two-byte modrm form....  */
12939
      if (index
12940
          /* ...like esp, which always wants an index.  */
12941
          || base == stack_pointer_rtx
12942
          || base == arg_pointer_rtx
12943
          || base == frame_pointer_rtx)
12944
        len += 1;
12945
    }
12946
 
12947
  return len;
12948
}
12949
 
12950
/* Compute default value for "length_immediate" attribute.  When SHORTFORM
12951
   is set, expect that insn have 8bit immediate alternative.  */
12952
int
12953
ix86_attr_length_immediate_default (rtx insn, int shortform)
12954
{
12955
  int len = 0;
12956
  int i;
12957
  extract_insn_cached (insn);
12958
  for (i = recog_data.n_operands - 1; i >= 0; --i)
12959
    if (CONSTANT_P (recog_data.operand[i]))
12960
      {
12961
        gcc_assert (!len);
12962
        if (shortform
12963
            && GET_CODE (recog_data.operand[i]) == CONST_INT
12964
            && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12965
          len = 1;
12966
        else
12967
          {
12968
            switch (get_attr_mode (insn))
12969
              {
12970
                case MODE_QI:
12971
                  len+=1;
12972
                  break;
12973
                case MODE_HI:
12974
                  len+=2;
12975
                  break;
12976
                case MODE_SI:
12977
                  len+=4;
12978
                  break;
12979
                /* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
12980
                case MODE_DI:
12981
                  len+=4;
12982
                  break;
12983
                default:
12984
                  fatal_insn ("unknown insn mode", insn);
12985
              }
12986
          }
12987
      }
12988
  return len;
12989
}
12990
/* Compute default value for "length_address" attribute.  */
12991
int
12992
ix86_attr_length_address_default (rtx insn)
12993
{
12994
  int i;
12995
 
12996
  if (get_attr_type (insn) == TYPE_LEA)
12997
    {
12998
      rtx set = PATTERN (insn);
12999
 
13000
      if (GET_CODE (set) == PARALLEL)
13001
        set = XVECEXP (set, 0, 0);
13002
 
13003
      gcc_assert (GET_CODE (set) == SET);
13004
 
13005
      return memory_address_length (SET_SRC (set));
13006
    }
13007
 
13008
  extract_insn_cached (insn);
13009
  for (i = recog_data.n_operands - 1; i >= 0; --i)
13010
    if (GET_CODE (recog_data.operand[i]) == MEM)
13011
      {
13012
        return memory_address_length (XEXP (recog_data.operand[i], 0));
13013
        break;
13014
      }
13015
  return 0;
13016
}
13017
 
13018
/* Return the maximum number of instructions a cpu can issue.  */
13019
 
13020
static int
13021
ix86_issue_rate (void)
13022
{
13023
  switch (ix86_tune)
13024
    {
13025
    case PROCESSOR_PENTIUM:
13026
    case PROCESSOR_K6:
13027
      return 2;
13028
 
13029
    case PROCESSOR_PENTIUMPRO:
13030
    case PROCESSOR_PENTIUM4:
13031
    case PROCESSOR_ATHLON:
13032
    case PROCESSOR_K8:
13033
    case PROCESSOR_NOCONA:
13034
      return 3;
13035
 
13036
    default:
13037
      return 1;
13038
    }
13039
}
13040
 
13041
/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13042
   by DEP_INSN and nothing set by DEP_INSN.  */
13043
 
13044
static int
13045
ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13046
{
13047
  rtx set, set2;
13048
 
13049
  /* Simplify the test for uninteresting insns.  */
13050
  if (insn_type != TYPE_SETCC
13051
      && insn_type != TYPE_ICMOV
13052
      && insn_type != TYPE_FCMOV
13053
      && insn_type != TYPE_IBR)
13054
    return 0;
13055
 
13056
  if ((set = single_set (dep_insn)) != 0)
13057
    {
13058
      set = SET_DEST (set);
13059
      set2 = NULL_RTX;
13060
    }
13061
  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13062
           && XVECLEN (PATTERN (dep_insn), 0) == 2
13063
           && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13064
           && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13065
    {
13066
      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13067
      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13068
    }
13069
  else
13070
    return 0;
13071
 
13072
  if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13073
    return 0;
13074
 
13075
  /* This test is true if the dependent insn reads the flags but
13076
     not any other potentially set register.  */
13077
  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13078
    return 0;
13079
 
13080
  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13081
    return 0;
13082
 
13083
  return 1;
13084
}
13085
 
13086
/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13087
   address with operands set by DEP_INSN.  */
13088
 
13089
static int
13090
ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13091
{
13092
  rtx addr;
13093
 
13094
  if (insn_type == TYPE_LEA
13095
      && TARGET_PENTIUM)
13096
    {
13097
      addr = PATTERN (insn);
13098
 
13099
      if (GET_CODE (addr) == PARALLEL)
13100
        addr = XVECEXP (addr, 0, 0);
13101
 
13102
      gcc_assert (GET_CODE (addr) == SET);
13103
 
13104
      addr = SET_SRC (addr);
13105
    }
13106
  else
13107
    {
13108
      int i;
13109
      extract_insn_cached (insn);
13110
      for (i = recog_data.n_operands - 1; i >= 0; --i)
13111
        if (GET_CODE (recog_data.operand[i]) == MEM)
13112
          {
13113
            addr = XEXP (recog_data.operand[i], 0);
13114
            goto found;
13115
          }
13116
      return 0;
13117
    found:;
13118
    }
13119
 
13120
  return modified_in_p (addr, dep_insn);
13121
}
13122
 
13123
static int
13124
ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13125
{
13126
  enum attr_type insn_type, dep_insn_type;
13127
  enum attr_memory memory;
13128
  rtx set, set2;
13129
  int dep_insn_code_number;
13130
 
13131
  /* Anti and output dependencies have zero cost on all CPUs.  */
13132
  if (REG_NOTE_KIND (link) != 0)
13133
    return 0;
13134
 
13135
  dep_insn_code_number = recog_memoized (dep_insn);
13136
 
13137
  /* If we can't recognize the insns, we can't really do anything.  */
13138
  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13139
    return cost;
13140
 
13141
  insn_type = get_attr_type (insn);
13142
  dep_insn_type = get_attr_type (dep_insn);
13143
 
13144
  switch (ix86_tune)
13145
    {
13146
    case PROCESSOR_PENTIUM:
13147
      /* Address Generation Interlock adds a cycle of latency.  */
13148
      if (ix86_agi_dependant (insn, dep_insn, insn_type))
13149
        cost += 1;
13150
 
13151
      /* ??? Compares pair with jump/setcc.  */
13152
      if (ix86_flags_dependant (insn, dep_insn, insn_type))
13153
        cost = 0;
13154
 
13155
      /* Floating point stores require value to be ready one cycle earlier.  */
13156
      if (insn_type == TYPE_FMOV
13157
          && get_attr_memory (insn) == MEMORY_STORE
13158
          && !ix86_agi_dependant (insn, dep_insn, insn_type))
13159
        cost += 1;
13160
      break;
13161
 
13162
    case PROCESSOR_PENTIUMPRO:
13163
      memory = get_attr_memory (insn);
13164
 
13165
      /* INT->FP conversion is expensive.  */
13166
      if (get_attr_fp_int_src (dep_insn))
13167
        cost += 5;
13168
 
13169
      /* There is one cycle extra latency between an FP op and a store.  */
13170
      if (insn_type == TYPE_FMOV
13171
          && (set = single_set (dep_insn)) != NULL_RTX
13172
          && (set2 = single_set (insn)) != NULL_RTX
13173
          && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13174
          && GET_CODE (SET_DEST (set2)) == MEM)
13175
        cost += 1;
13176
 
13177
      /* Show ability of reorder buffer to hide latency of load by executing
13178
         in parallel with previous instruction in case
13179
         previous instruction is not needed to compute the address.  */
13180
      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13181
          && !ix86_agi_dependant (insn, dep_insn, insn_type))
13182
        {
13183
          /* Claim moves to take one cycle, as core can issue one load
13184
             at time and the next load can start cycle later.  */
13185
          if (dep_insn_type == TYPE_IMOV
13186
              || dep_insn_type == TYPE_FMOV)
13187
            cost = 1;
13188
          else if (cost > 1)
13189
            cost--;
13190
        }
13191
      break;
13192
 
13193
    case PROCESSOR_K6:
13194
      memory = get_attr_memory (insn);
13195
 
13196
      /* The esp dependency is resolved before the instruction is really
13197
         finished.  */
13198
      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13199
          && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13200
        return 1;
13201
 
13202
      /* INT->FP conversion is expensive.  */
13203
      if (get_attr_fp_int_src (dep_insn))
13204
        cost += 5;
13205
 
13206
      /* Show ability of reorder buffer to hide latency of load by executing
13207
         in parallel with previous instruction in case
13208
         previous instruction is not needed to compute the address.  */
13209
      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13210
          && !ix86_agi_dependant (insn, dep_insn, insn_type))
13211
        {
13212
          /* Claim moves to take one cycle, as core can issue one load
13213
             at time and the next load can start cycle later.  */
13214
          if (dep_insn_type == TYPE_IMOV
13215
              || dep_insn_type == TYPE_FMOV)
13216
            cost = 1;
13217
          else if (cost > 2)
13218
            cost -= 2;
13219
          else
13220
            cost = 1;
13221
        }
13222
      break;
13223
 
13224
    case PROCESSOR_ATHLON:
13225
    case PROCESSOR_K8:
13226
      memory = get_attr_memory (insn);
13227
 
13228
      /* Show ability of reorder buffer to hide latency of load by executing
13229
         in parallel with previous instruction in case
13230
         previous instruction is not needed to compute the address.  */
13231
      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13232
          && !ix86_agi_dependant (insn, dep_insn, insn_type))
13233
        {
13234
          enum attr_unit unit = get_attr_unit (insn);
13235
          int loadcost = 3;
13236
 
13237
          /* Because of the difference between the length of integer and
13238
             floating unit pipeline preparation stages, the memory operands
13239
             for floating point are cheaper.
13240
 
13241
             ??? For Athlon it the difference is most probably 2.  */
13242
          if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13243
            loadcost = 3;
13244
          else
13245
            loadcost = TARGET_ATHLON ? 2 : 0;
13246
 
13247
          if (cost >= loadcost)
13248
            cost -= loadcost;
13249
          else
13250
            cost = 0;
13251
        }
13252
 
13253
    default:
13254
      break;
13255
    }
13256
 
13257
  return cost;
13258
}
13259
 
13260
/* How many alternative schedules to try.  This should be as wide as the
13261
   scheduling freedom in the DFA, but no wider.  Making this value too
13262
   large results extra work for the scheduler.  */
13263
 
13264
static int
13265
ia32_multipass_dfa_lookahead (void)
13266
{
13267
  if (ix86_tune == PROCESSOR_PENTIUM)
13268
    return 2;
13269
 
13270
  if (ix86_tune == PROCESSOR_PENTIUMPRO
13271
      || ix86_tune == PROCESSOR_K6)
13272
    return 1;
13273
 
13274
  else
13275
    return 0;
13276
}
13277
 
13278
 
13279
/* Compute the alignment given to a constant that is being placed in memory.
13280
   EXP is the constant and ALIGN is the alignment that the object would
13281
   ordinarily have.
13282
   The value of this function is used instead of that alignment to align
13283
   the object.  */
13284
 
13285
int
13286
ix86_constant_alignment (tree exp, int align)
13287
{
13288
  if (TREE_CODE (exp) == REAL_CST)
13289
    {
13290
      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13291
        return 64;
13292
      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13293
        return 128;
13294
    }
13295
  else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13296
           && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13297
    return BITS_PER_WORD;
13298
 
13299
  return align;
13300
}
13301
 
13302
/* Compute the alignment for a static variable.
13303
   TYPE is the data type, and ALIGN is the alignment that
13304
   the object would ordinarily have.  The value of this function is used
13305
   instead of that alignment to align the object.  */
13306
 
13307
int
13308
ix86_data_alignment (tree type, int align)
13309
{
13310
  if (AGGREGATE_TYPE_P (type)
13311
       && TYPE_SIZE (type)
13312
       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13313
       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
13314
           || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
13315
    return 256;
13316
 
13317
  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13318
     to 16byte boundary.  */
13319
  if (TARGET_64BIT)
13320
    {
13321
      if (AGGREGATE_TYPE_P (type)
13322
           && TYPE_SIZE (type)
13323
           && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13324
           && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13325
               || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13326
        return 128;
13327
    }
13328
 
13329
  if (TREE_CODE (type) == ARRAY_TYPE)
13330
    {
13331
      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13332
        return 64;
13333
      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13334
        return 128;
13335
    }
13336
  else if (TREE_CODE (type) == COMPLEX_TYPE)
13337
    {
13338
 
13339
      if (TYPE_MODE (type) == DCmode && align < 64)
13340
        return 64;
13341
      if (TYPE_MODE (type) == XCmode && align < 128)
13342
        return 128;
13343
    }
13344
  else if ((TREE_CODE (type) == RECORD_TYPE
13345
            || TREE_CODE (type) == UNION_TYPE
13346
            || TREE_CODE (type) == QUAL_UNION_TYPE)
13347
           && TYPE_FIELDS (type))
13348
    {
13349
      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13350
        return 64;
13351
      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13352
        return 128;
13353
    }
13354
  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13355
           || TREE_CODE (type) == INTEGER_TYPE)
13356
    {
13357
      if (TYPE_MODE (type) == DFmode && align < 64)
13358
        return 64;
13359
      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13360
        return 128;
13361
    }
13362
 
13363
  return align;
13364
}
13365
 
13366
/* Compute the alignment for a local variable.
13367
   TYPE is the data type, and ALIGN is the alignment that
13368
   the object would ordinarily have.  The value of this macro is used
13369
   instead of that alignment to align the object.  */
13370
 
13371
int
13372
ix86_local_alignment (tree type, int align)
13373
{
13374
  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13375
     to 16byte boundary.  */
13376
  if (TARGET_64BIT)
13377
    {
13378
      if (AGGREGATE_TYPE_P (type)
13379
           && TYPE_SIZE (type)
13380
           && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13381
           && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13382
               || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13383
        return 128;
13384
    }
13385
  if (TREE_CODE (type) == ARRAY_TYPE)
13386
    {
13387
      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13388
        return 64;
13389
      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13390
        return 128;
13391
    }
13392
  else if (TREE_CODE (type) == COMPLEX_TYPE)
13393
    {
13394
      if (TYPE_MODE (type) == DCmode && align < 64)
13395
        return 64;
13396
      if (TYPE_MODE (type) == XCmode && align < 128)
13397
        return 128;
13398
    }
13399
  else if ((TREE_CODE (type) == RECORD_TYPE
13400
            || TREE_CODE (type) == UNION_TYPE
13401
            || TREE_CODE (type) == QUAL_UNION_TYPE)
13402
           && TYPE_FIELDS (type))
13403
    {
13404
      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13405
        return 64;
13406
      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13407
        return 128;
13408
    }
13409
  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13410
           || TREE_CODE (type) == INTEGER_TYPE)
13411
    {
13412
 
13413
      if (TYPE_MODE (type) == DFmode && align < 64)
13414
        return 64;
13415
      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13416
        return 128;
13417
    }
13418
  return align;
13419
}
13420
 
13421
/* Emit RTL insns to initialize the variable parts of a trampoline.
13422
   FNADDR is an RTX for the address of the function's pure code.
13423
   CXT is an RTX for the static chain value for the function.  */
13424
void
13425
x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
13426
{
13427
  if (!TARGET_64BIT)
13428
    {
13429
      /* Compute offset from the end of the jmp to the target function.  */
13430
      rtx disp = expand_binop (SImode, sub_optab, fnaddr,
13431
                               plus_constant (tramp, 10),
13432
                               NULL_RTX, 1, OPTAB_DIRECT);
13433
      emit_move_insn (gen_rtx_MEM (QImode, tramp),
13434
                      gen_int_mode (0xb9, QImode));
13435
      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
13436
      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
13437
                      gen_int_mode (0xe9, QImode));
13438
      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
13439
    }
13440
  else
13441
    {
13442
      int offset = 0;
13443
      /* Try to load address using shorter movl instead of movabs.
13444
         We may want to support movq for kernel mode, but kernel does not use
13445
         trampolines at the moment.  */
13446
      if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
13447
        {
13448
          fnaddr = copy_to_mode_reg (DImode, fnaddr);
13449
          emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13450
                          gen_int_mode (0xbb41, HImode));
13451
          emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
13452
                          gen_lowpart (SImode, fnaddr));
13453
          offset += 6;
13454
        }
13455
      else
13456
        {
13457
          emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13458
                          gen_int_mode (0xbb49, HImode));
13459
          emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13460
                          fnaddr);
13461
          offset += 10;
13462
        }
13463
      /* Load static chain using movabs to r10.  */
13464
      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13465
                      gen_int_mode (0xba49, HImode));
13466
      emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13467
                      cxt);
13468
      offset += 10;
13469
      /* Jump to the r11 */
13470
      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13471
                      gen_int_mode (0xff49, HImode));
13472
      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
13473
                      gen_int_mode (0xe3, QImode));
13474
      offset += 3;
13475
      gcc_assert (offset <= TRAMPOLINE_SIZE);
13476
    }
13477
 
13478
#ifdef ENABLE_EXECUTE_STACK
13479
  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
13480
                     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
13481
#endif
13482
}
13483
 
13484
/* Codes for all the SSE/MMX builtins.  */
13485
enum ix86_builtins
13486
{
13487
  IX86_BUILTIN_ADDPS,
13488
  IX86_BUILTIN_ADDSS,
13489
  IX86_BUILTIN_DIVPS,
13490
  IX86_BUILTIN_DIVSS,
13491
  IX86_BUILTIN_MULPS,
13492
  IX86_BUILTIN_MULSS,
13493
  IX86_BUILTIN_SUBPS,
13494
  IX86_BUILTIN_SUBSS,
13495
 
13496
  IX86_BUILTIN_CMPEQPS,
13497
  IX86_BUILTIN_CMPLTPS,
13498
  IX86_BUILTIN_CMPLEPS,
13499
  IX86_BUILTIN_CMPGTPS,
13500
  IX86_BUILTIN_CMPGEPS,
13501
  IX86_BUILTIN_CMPNEQPS,
13502
  IX86_BUILTIN_CMPNLTPS,
13503
  IX86_BUILTIN_CMPNLEPS,
13504
  IX86_BUILTIN_CMPNGTPS,
13505
  IX86_BUILTIN_CMPNGEPS,
13506
  IX86_BUILTIN_CMPORDPS,
13507
  IX86_BUILTIN_CMPUNORDPS,
13508
  IX86_BUILTIN_CMPNEPS,
13509
  IX86_BUILTIN_CMPEQSS,
13510
  IX86_BUILTIN_CMPLTSS,
13511
  IX86_BUILTIN_CMPLESS,
13512
  IX86_BUILTIN_CMPNEQSS,
13513
  IX86_BUILTIN_CMPNLTSS,
13514
  IX86_BUILTIN_CMPNLESS,
13515
  IX86_BUILTIN_CMPNGTSS,
13516
  IX86_BUILTIN_CMPNGESS,
13517
  IX86_BUILTIN_CMPORDSS,
13518
  IX86_BUILTIN_CMPUNORDSS,
13519
  IX86_BUILTIN_CMPNESS,
13520
 
13521
  IX86_BUILTIN_COMIEQSS,
13522
  IX86_BUILTIN_COMILTSS,
13523
  IX86_BUILTIN_COMILESS,
13524
  IX86_BUILTIN_COMIGTSS,
13525
  IX86_BUILTIN_COMIGESS,
13526
  IX86_BUILTIN_COMINEQSS,
13527
  IX86_BUILTIN_UCOMIEQSS,
13528
  IX86_BUILTIN_UCOMILTSS,
13529
  IX86_BUILTIN_UCOMILESS,
13530
  IX86_BUILTIN_UCOMIGTSS,
13531
  IX86_BUILTIN_UCOMIGESS,
13532
  IX86_BUILTIN_UCOMINEQSS,
13533
 
13534
  IX86_BUILTIN_CVTPI2PS,
13535
  IX86_BUILTIN_CVTPS2PI,
13536
  IX86_BUILTIN_CVTSI2SS,
13537
  IX86_BUILTIN_CVTSI642SS,
13538
  IX86_BUILTIN_CVTSS2SI,
13539
  IX86_BUILTIN_CVTSS2SI64,
13540
  IX86_BUILTIN_CVTTPS2PI,
13541
  IX86_BUILTIN_CVTTSS2SI,
13542
  IX86_BUILTIN_CVTTSS2SI64,
13543
 
13544
  IX86_BUILTIN_MAXPS,
13545
  IX86_BUILTIN_MAXSS,
13546
  IX86_BUILTIN_MINPS,
13547
  IX86_BUILTIN_MINSS,
13548
 
13549
  IX86_BUILTIN_LOADUPS,
13550
  IX86_BUILTIN_STOREUPS,
13551
  IX86_BUILTIN_MOVSS,
13552
 
13553
  IX86_BUILTIN_MOVHLPS,
13554
  IX86_BUILTIN_MOVLHPS,
13555
  IX86_BUILTIN_LOADHPS,
13556
  IX86_BUILTIN_LOADLPS,
13557
  IX86_BUILTIN_STOREHPS,
13558
  IX86_BUILTIN_STORELPS,
13559
 
13560
  IX86_BUILTIN_MASKMOVQ,
13561
  IX86_BUILTIN_MOVMSKPS,
13562
  IX86_BUILTIN_PMOVMSKB,
13563
 
13564
  IX86_BUILTIN_MOVNTPS,
13565
  IX86_BUILTIN_MOVNTQ,
13566
 
13567
  IX86_BUILTIN_LOADDQU,
13568
  IX86_BUILTIN_STOREDQU,
13569
 
13570
  IX86_BUILTIN_PACKSSWB,
13571
  IX86_BUILTIN_PACKSSDW,
13572
  IX86_BUILTIN_PACKUSWB,
13573
 
13574
  IX86_BUILTIN_PADDB,
13575
  IX86_BUILTIN_PADDW,
13576
  IX86_BUILTIN_PADDD,
13577
  IX86_BUILTIN_PADDQ,
13578
  IX86_BUILTIN_PADDSB,
13579
  IX86_BUILTIN_PADDSW,
13580
  IX86_BUILTIN_PADDUSB,
13581
  IX86_BUILTIN_PADDUSW,
13582
  IX86_BUILTIN_PSUBB,
13583
  IX86_BUILTIN_PSUBW,
13584
  IX86_BUILTIN_PSUBD,
13585
  IX86_BUILTIN_PSUBQ,
13586
  IX86_BUILTIN_PSUBSB,
13587
  IX86_BUILTIN_PSUBSW,
13588
  IX86_BUILTIN_PSUBUSB,
13589
  IX86_BUILTIN_PSUBUSW,
13590
 
13591
  IX86_BUILTIN_PAND,
13592
  IX86_BUILTIN_PANDN,
13593
  IX86_BUILTIN_POR,
13594
  IX86_BUILTIN_PXOR,
13595
 
13596
  IX86_BUILTIN_PAVGB,
13597
  IX86_BUILTIN_PAVGW,
13598
 
13599
  IX86_BUILTIN_PCMPEQB,
13600
  IX86_BUILTIN_PCMPEQW,
13601
  IX86_BUILTIN_PCMPEQD,
13602
  IX86_BUILTIN_PCMPGTB,
13603
  IX86_BUILTIN_PCMPGTW,
13604
  IX86_BUILTIN_PCMPGTD,
13605
 
13606
  IX86_BUILTIN_PMADDWD,
13607
 
13608
  IX86_BUILTIN_PMAXSW,
13609
  IX86_BUILTIN_PMAXUB,
13610
  IX86_BUILTIN_PMINSW,
13611
  IX86_BUILTIN_PMINUB,
13612
 
13613
  IX86_BUILTIN_PMULHUW,
13614
  IX86_BUILTIN_PMULHW,
13615
  IX86_BUILTIN_PMULLW,
13616
 
13617
  IX86_BUILTIN_PSADBW,
13618
  IX86_BUILTIN_PSHUFW,
13619
 
13620
  IX86_BUILTIN_PSLLW,
13621
  IX86_BUILTIN_PSLLD,
13622
  IX86_BUILTIN_PSLLQ,
13623
  IX86_BUILTIN_PSRAW,
13624
  IX86_BUILTIN_PSRAD,
13625
  IX86_BUILTIN_PSRLW,
13626
  IX86_BUILTIN_PSRLD,
13627
  IX86_BUILTIN_PSRLQ,
13628
  IX86_BUILTIN_PSLLWI,
13629
  IX86_BUILTIN_PSLLDI,
13630
  IX86_BUILTIN_PSLLQI,
13631
  IX86_BUILTIN_PSRAWI,
13632
  IX86_BUILTIN_PSRADI,
13633
  IX86_BUILTIN_PSRLWI,
13634
  IX86_BUILTIN_PSRLDI,
13635
  IX86_BUILTIN_PSRLQI,
13636
 
13637
  IX86_BUILTIN_PUNPCKHBW,
13638
  IX86_BUILTIN_PUNPCKHWD,
13639
  IX86_BUILTIN_PUNPCKHDQ,
13640
  IX86_BUILTIN_PUNPCKLBW,
13641
  IX86_BUILTIN_PUNPCKLWD,
13642
  IX86_BUILTIN_PUNPCKLDQ,
13643
 
13644
  IX86_BUILTIN_SHUFPS,
13645
 
13646
  IX86_BUILTIN_RCPPS,
13647
  IX86_BUILTIN_RCPSS,
13648
  IX86_BUILTIN_RSQRTPS,
13649
  IX86_BUILTIN_RSQRTSS,
13650
  IX86_BUILTIN_SQRTPS,
13651
  IX86_BUILTIN_SQRTSS,
13652
 
13653
  IX86_BUILTIN_UNPCKHPS,
13654
  IX86_BUILTIN_UNPCKLPS,
13655
 
13656
  IX86_BUILTIN_ANDPS,
13657
  IX86_BUILTIN_ANDNPS,
13658
  IX86_BUILTIN_ORPS,
13659
  IX86_BUILTIN_XORPS,
13660
 
13661
  IX86_BUILTIN_EMMS,
13662
  IX86_BUILTIN_LDMXCSR,
13663
  IX86_BUILTIN_STMXCSR,
13664
  IX86_BUILTIN_SFENCE,
13665
 
13666
  /* 3DNow! Original */
13667
  IX86_BUILTIN_FEMMS,
13668
  IX86_BUILTIN_PAVGUSB,
13669
  IX86_BUILTIN_PF2ID,
13670
  IX86_BUILTIN_PFACC,
13671
  IX86_BUILTIN_PFADD,
13672
  IX86_BUILTIN_PFCMPEQ,
13673
  IX86_BUILTIN_PFCMPGE,
13674
  IX86_BUILTIN_PFCMPGT,
13675
  IX86_BUILTIN_PFMAX,
13676
  IX86_BUILTIN_PFMIN,
13677
  IX86_BUILTIN_PFMUL,
13678
  IX86_BUILTIN_PFRCP,
13679
  IX86_BUILTIN_PFRCPIT1,
13680
  IX86_BUILTIN_PFRCPIT2,
13681
  IX86_BUILTIN_PFRSQIT1,
13682
  IX86_BUILTIN_PFRSQRT,
13683
  IX86_BUILTIN_PFSUB,
13684
  IX86_BUILTIN_PFSUBR,
13685
  IX86_BUILTIN_PI2FD,
13686
  IX86_BUILTIN_PMULHRW,
13687
 
13688
  /* 3DNow! Athlon Extensions */
13689
  IX86_BUILTIN_PF2IW,
13690
  IX86_BUILTIN_PFNACC,
13691
  IX86_BUILTIN_PFPNACC,
13692
  IX86_BUILTIN_PI2FW,
13693
  IX86_BUILTIN_PSWAPDSI,
13694
  IX86_BUILTIN_PSWAPDSF,
13695
 
13696
  /* SSE2 */
13697
  IX86_BUILTIN_ADDPD,
13698
  IX86_BUILTIN_ADDSD,
13699
  IX86_BUILTIN_DIVPD,
13700
  IX86_BUILTIN_DIVSD,
13701
  IX86_BUILTIN_MULPD,
13702
  IX86_BUILTIN_MULSD,
13703
  IX86_BUILTIN_SUBPD,
13704
  IX86_BUILTIN_SUBSD,
13705
 
13706
  IX86_BUILTIN_CMPEQPD,
13707
  IX86_BUILTIN_CMPLTPD,
13708
  IX86_BUILTIN_CMPLEPD,
13709
  IX86_BUILTIN_CMPGTPD,
13710
  IX86_BUILTIN_CMPGEPD,
13711
  IX86_BUILTIN_CMPNEQPD,
13712
  IX86_BUILTIN_CMPNLTPD,
13713
  IX86_BUILTIN_CMPNLEPD,
13714
  IX86_BUILTIN_CMPNGTPD,
13715
  IX86_BUILTIN_CMPNGEPD,
13716
  IX86_BUILTIN_CMPORDPD,
13717
  IX86_BUILTIN_CMPUNORDPD,
13718
  IX86_BUILTIN_CMPNEPD,
13719
  IX86_BUILTIN_CMPEQSD,
13720
  IX86_BUILTIN_CMPLTSD,
13721
  IX86_BUILTIN_CMPLESD,
13722
  IX86_BUILTIN_CMPNEQSD,
13723
  IX86_BUILTIN_CMPNLTSD,
13724
  IX86_BUILTIN_CMPNLESD,
13725
  IX86_BUILTIN_CMPORDSD,
13726
  IX86_BUILTIN_CMPUNORDSD,
13727
  IX86_BUILTIN_CMPNESD,
13728
 
13729
  IX86_BUILTIN_COMIEQSD,
13730
  IX86_BUILTIN_COMILTSD,
13731
  IX86_BUILTIN_COMILESD,
13732
  IX86_BUILTIN_COMIGTSD,
13733
  IX86_BUILTIN_COMIGESD,
13734
  IX86_BUILTIN_COMINEQSD,
13735
  IX86_BUILTIN_UCOMIEQSD,
13736
  IX86_BUILTIN_UCOMILTSD,
13737
  IX86_BUILTIN_UCOMILESD,
13738
  IX86_BUILTIN_UCOMIGTSD,
13739
  IX86_BUILTIN_UCOMIGESD,
13740
  IX86_BUILTIN_UCOMINEQSD,
13741
 
13742
  IX86_BUILTIN_MAXPD,
13743
  IX86_BUILTIN_MAXSD,
13744
  IX86_BUILTIN_MINPD,
13745
  IX86_BUILTIN_MINSD,
13746
 
13747
  IX86_BUILTIN_ANDPD,
13748
  IX86_BUILTIN_ANDNPD,
13749
  IX86_BUILTIN_ORPD,
13750
  IX86_BUILTIN_XORPD,
13751
 
13752
  IX86_BUILTIN_SQRTPD,
13753
  IX86_BUILTIN_SQRTSD,
13754
 
13755
  IX86_BUILTIN_UNPCKHPD,
13756
  IX86_BUILTIN_UNPCKLPD,
13757
 
13758
  IX86_BUILTIN_SHUFPD,
13759
 
13760
  IX86_BUILTIN_LOADUPD,
13761
  IX86_BUILTIN_STOREUPD,
13762
  IX86_BUILTIN_MOVSD,
13763
 
13764
  IX86_BUILTIN_LOADHPD,
13765
  IX86_BUILTIN_LOADLPD,
13766
 
13767
  IX86_BUILTIN_CVTDQ2PD,
13768
  IX86_BUILTIN_CVTDQ2PS,
13769
 
13770
  IX86_BUILTIN_CVTPD2DQ,
13771
  IX86_BUILTIN_CVTPD2PI,
13772
  IX86_BUILTIN_CVTPD2PS,
13773
  IX86_BUILTIN_CVTTPD2DQ,
13774
  IX86_BUILTIN_CVTTPD2PI,
13775
 
13776
  IX86_BUILTIN_CVTPI2PD,
13777
  IX86_BUILTIN_CVTSI2SD,
13778
  IX86_BUILTIN_CVTSI642SD,
13779
 
13780
  IX86_BUILTIN_CVTSD2SI,
13781
  IX86_BUILTIN_CVTSD2SI64,
13782
  IX86_BUILTIN_CVTSD2SS,
13783
  IX86_BUILTIN_CVTSS2SD,
13784
  IX86_BUILTIN_CVTTSD2SI,
13785
  IX86_BUILTIN_CVTTSD2SI64,
13786
 
13787
  IX86_BUILTIN_CVTPS2DQ,
13788
  IX86_BUILTIN_CVTPS2PD,
13789
  IX86_BUILTIN_CVTTPS2DQ,
13790
 
13791
  IX86_BUILTIN_MOVNTI,
13792
  IX86_BUILTIN_MOVNTPD,
13793
  IX86_BUILTIN_MOVNTDQ,
13794
 
13795
  /* SSE2 MMX */
13796
  IX86_BUILTIN_MASKMOVDQU,
13797
  IX86_BUILTIN_MOVMSKPD,
13798
  IX86_BUILTIN_PMOVMSKB128,
13799
 
13800
  IX86_BUILTIN_PACKSSWB128,
13801
  IX86_BUILTIN_PACKSSDW128,
13802
  IX86_BUILTIN_PACKUSWB128,
13803
 
13804
  IX86_BUILTIN_PADDB128,
13805
  IX86_BUILTIN_PADDW128,
13806
  IX86_BUILTIN_PADDD128,
13807
  IX86_BUILTIN_PADDQ128,
13808
  IX86_BUILTIN_PADDSB128,
13809
  IX86_BUILTIN_PADDSW128,
13810
  IX86_BUILTIN_PADDUSB128,
13811
  IX86_BUILTIN_PADDUSW128,
13812
  IX86_BUILTIN_PSUBB128,
13813
  IX86_BUILTIN_PSUBW128,
13814
  IX86_BUILTIN_PSUBD128,
13815
  IX86_BUILTIN_PSUBQ128,
13816
  IX86_BUILTIN_PSUBSB128,
13817
  IX86_BUILTIN_PSUBSW128,
13818
  IX86_BUILTIN_PSUBUSB128,
13819
  IX86_BUILTIN_PSUBUSW128,
13820
 
13821
  IX86_BUILTIN_PAND128,
13822
  IX86_BUILTIN_PANDN128,
13823
  IX86_BUILTIN_POR128,
13824
  IX86_BUILTIN_PXOR128,
13825
 
13826
  IX86_BUILTIN_PAVGB128,
13827
  IX86_BUILTIN_PAVGW128,
13828
 
13829
  IX86_BUILTIN_PCMPEQB128,
13830
  IX86_BUILTIN_PCMPEQW128,
13831
  IX86_BUILTIN_PCMPEQD128,
13832
  IX86_BUILTIN_PCMPGTB128,
13833
  IX86_BUILTIN_PCMPGTW128,
13834
  IX86_BUILTIN_PCMPGTD128,
13835
 
13836
  IX86_BUILTIN_PMADDWD128,
13837
 
13838
  IX86_BUILTIN_PMAXSW128,
13839
  IX86_BUILTIN_PMAXUB128,
13840
  IX86_BUILTIN_PMINSW128,
13841
  IX86_BUILTIN_PMINUB128,
13842
 
13843
  IX86_BUILTIN_PMULUDQ,
13844
  IX86_BUILTIN_PMULUDQ128,
13845
  IX86_BUILTIN_PMULHUW128,
13846
  IX86_BUILTIN_PMULHW128,
13847
  IX86_BUILTIN_PMULLW128,
13848
 
13849
  IX86_BUILTIN_PSADBW128,
13850
  IX86_BUILTIN_PSHUFHW,
13851
  IX86_BUILTIN_PSHUFLW,
13852
  IX86_BUILTIN_PSHUFD,
13853
 
13854
  IX86_BUILTIN_PSLLW128,
13855
  IX86_BUILTIN_PSLLD128,
13856
  IX86_BUILTIN_PSLLQ128,
13857
  IX86_BUILTIN_PSRAW128,
13858
  IX86_BUILTIN_PSRAD128,
13859
  IX86_BUILTIN_PSRLW128,
13860
  IX86_BUILTIN_PSRLD128,
13861
  IX86_BUILTIN_PSRLQ128,
13862
  IX86_BUILTIN_PSLLDQI128,
13863
  IX86_BUILTIN_PSLLWI128,
13864
  IX86_BUILTIN_PSLLDI128,
13865
  IX86_BUILTIN_PSLLQI128,
13866
  IX86_BUILTIN_PSRAWI128,
13867
  IX86_BUILTIN_PSRADI128,
13868
  IX86_BUILTIN_PSRLDQI128,
13869
  IX86_BUILTIN_PSRLWI128,
13870
  IX86_BUILTIN_PSRLDI128,
13871
  IX86_BUILTIN_PSRLQI128,
13872
 
13873
  IX86_BUILTIN_PUNPCKHBW128,
13874
  IX86_BUILTIN_PUNPCKHWD128,
13875
  IX86_BUILTIN_PUNPCKHDQ128,
13876
  IX86_BUILTIN_PUNPCKHQDQ128,
13877
  IX86_BUILTIN_PUNPCKLBW128,
13878
  IX86_BUILTIN_PUNPCKLWD128,
13879
  IX86_BUILTIN_PUNPCKLDQ128,
13880
  IX86_BUILTIN_PUNPCKLQDQ128,
13881
 
13882
  IX86_BUILTIN_CLFLUSH,
13883
  IX86_BUILTIN_MFENCE,
13884
  IX86_BUILTIN_LFENCE,
13885
 
13886
  /* Prescott New Instructions.  */
13887
  IX86_BUILTIN_ADDSUBPS,
13888
  IX86_BUILTIN_HADDPS,
13889
  IX86_BUILTIN_HSUBPS,
13890
  IX86_BUILTIN_MOVSHDUP,
13891
  IX86_BUILTIN_MOVSLDUP,
13892
  IX86_BUILTIN_ADDSUBPD,
13893
  IX86_BUILTIN_HADDPD,
13894
  IX86_BUILTIN_HSUBPD,
13895
  IX86_BUILTIN_LDDQU,
13896
 
13897
  IX86_BUILTIN_MONITOR,
13898
  IX86_BUILTIN_MWAIT,
13899
 
13900
  IX86_BUILTIN_VEC_INIT_V2SI,
13901
  IX86_BUILTIN_VEC_INIT_V4HI,
13902
  IX86_BUILTIN_VEC_INIT_V8QI,
13903
  IX86_BUILTIN_VEC_EXT_V2DF,
13904
  IX86_BUILTIN_VEC_EXT_V2DI,
13905
  IX86_BUILTIN_VEC_EXT_V4SF,
13906
  IX86_BUILTIN_VEC_EXT_V4SI,
13907
  IX86_BUILTIN_VEC_EXT_V8HI,
13908
  IX86_BUILTIN_VEC_EXT_V2SI,
13909
  IX86_BUILTIN_VEC_EXT_V4HI,
13910
  IX86_BUILTIN_VEC_SET_V8HI,
13911
  IX86_BUILTIN_VEC_SET_V4HI,
13912
 
13913
  IX86_BUILTIN_MAX
13914
};
13915
 
13916
#define def_builtin(MASK, NAME, TYPE, CODE)                             \
13917
do {                                                                    \
13918
  if ((MASK) & target_flags                                             \
13919
      && (!((MASK) & MASK_64BIT) || TARGET_64BIT))                      \
13920
    lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,   \
13921
                                 NULL, NULL_TREE);                      \
13922
} while (0)
13923
 
13924
/* Bits for builtin_description.flag.  */
13925
 
13926
/* Set when we don't support the comparison natively, and should
13927
   swap_comparison in order to support it.  */
13928
#define BUILTIN_DESC_SWAP_OPERANDS      1
13929
 
13930
struct builtin_description
13931
{
13932
  const unsigned int mask;
13933
  const enum insn_code icode;
13934
  const char *const name;
13935
  const enum ix86_builtins code;
13936
  const enum rtx_code comparison;
13937
  const unsigned int flag;
13938
};
13939
 
13940
static const struct builtin_description bdesc_comi[] =
13941
{
13942
  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
13943
  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
13944
  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
13945
  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
13946
  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
13947
  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
13948
  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
13949
  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
13950
  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
13951
  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
13952
  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
13953
  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
13954
  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
13955
  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
13956
  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
13957
  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
13958
  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
13959
  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
13960
  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
13961
  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
13962
  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
13963
  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
13964
  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
13965
  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
13966
};
13967
 
13968
static const struct builtin_description bdesc_2arg[] =
13969
{
13970
  /* SSE */
13971
  { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
13972
  { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
13973
  { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
13974
  { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
13975
  { MASK_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
13976
  { MASK_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
13977
  { MASK_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
13978
  { MASK_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
13979
 
13980
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
13981
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
13982
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
13983
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
13984
    BUILTIN_DESC_SWAP_OPERANDS },
13985
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
13986
    BUILTIN_DESC_SWAP_OPERANDS },
13987
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
13988
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
13989
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
13990
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
13991
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
13992
    BUILTIN_DESC_SWAP_OPERANDS },
13993
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
13994
    BUILTIN_DESC_SWAP_OPERANDS },
13995
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
13996
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
13997
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
13998
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
13999
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14000
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14001
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14002
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14003
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14004
    BUILTIN_DESC_SWAP_OPERANDS },
14005
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14006
    BUILTIN_DESC_SWAP_OPERANDS },
14007
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14008
 
14009
  { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14010
  { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14011
  { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14012
  { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14013
 
14014
  { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14015
  { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14016
  { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14017
  { MASK_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14018
 
14019
  { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14020
  { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14021
  { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14022
  { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14023
  { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14024
 
14025
  /* MMX */
14026
  { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14027
  { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14028
  { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14029
  { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14030
  { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14031
  { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14032
  { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14033
  { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14034
 
14035
  { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14036
  { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14037
  { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14038
  { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14039
  { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14040
  { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14041
  { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14042
  { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14043
 
14044
  { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14045
  { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14046
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14047
 
14048
  { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14049
  { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14050
  { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14051
  { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14052
 
14053
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14054
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14055
 
14056
  { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14057
  { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14058
  { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14059
  { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14060
  { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14061
  { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14062
 
14063
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14064
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14065
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14066
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14067
 
14068
  { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14069
  { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14070
  { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14071
  { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14072
  { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14073
  { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14074
 
14075
  /* Special.  */
14076
  { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14077
  { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14078
  { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14079
 
14080
  { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14081
  { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14082
  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14083
 
14084
  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14085
  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14086
  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14087
  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14088
  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14089
  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14090
 
14091
  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14092
  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14093
  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14094
  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14095
  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14096
  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14097
 
14098
  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14099
  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14100
  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14101
  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14102
 
14103
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14104
  { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14105
 
14106
  /* SSE2 */
14107
  { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14108
  { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14109
  { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14110
  { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14111
  { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14112
  { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14113
  { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14114
  { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14115
 
14116
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14117
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14118
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14119
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14120
    BUILTIN_DESC_SWAP_OPERANDS },
14121
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14122
    BUILTIN_DESC_SWAP_OPERANDS },
14123
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14124
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14125
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14126
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14127
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14128
    BUILTIN_DESC_SWAP_OPERANDS },
14129
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14130
    BUILTIN_DESC_SWAP_OPERANDS },
14131
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14132
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14133
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14134
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14135
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14136
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14137
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14138
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14139
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14140
 
14141
  { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14142
  { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14143
  { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14144
  { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14145
 
14146
  { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14147
  { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14148
  { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14149
  { MASK_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14150
 
14151
  { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14152
  { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14153
  { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14154
 
14155
  /* SSE2 MMX */
14156
  { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14157
  { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14158
  { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14159
  { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14160
  { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14161
  { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14162
  { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14163
  { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14164
 
14165
  { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14166
  { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14167
  { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14168
  { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14169
  { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14170
  { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14171
  { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14172
  { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14173
 
14174
  { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14175
  { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14176
 
14177
  { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14178
  { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14179
  { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14180
  { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14181
 
14182
  { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14183
  { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14184
 
14185
  { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14186
  { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14187
  { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14188
  { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14189
  { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14190
  { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14191
 
14192
  { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14193
  { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14194
  { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14195
  { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14196
 
14197
  { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14198
  { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14199
  { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14200
  { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14201
  { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14202
  { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14203
  { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14204
  { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14205
 
14206
  { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14207
  { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14208
  { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14209
 
14210
  { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14211
  { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14212
 
14213
  { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14214
  { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14215
 
14216
  { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14217
  { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14218
  { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14219
 
14220
  { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14221
  { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14222
  { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14223
 
14224
  { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14225
  { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14226
 
14227
  { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14228
 
14229
  { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14230
  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14231
  { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14232
  { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14233
 
14234
  /* SSE3 MMX */
14235
  { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14236
  { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14237
  { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14238
  { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14239
  { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14240
  { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14241
};
14242
 
14243
static const struct builtin_description bdesc_1arg[] =
14244
{
14245
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14246
  { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14247
 
14248
  { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14249
  { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14250
  { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14251
 
14252
  { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14253
  { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14254
  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14255
  { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14256
  { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14257
  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14258
 
14259
  { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14260
  { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14261
 
14262
  { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14263
 
14264
  { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14265
  { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14266
 
14267
  { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14268
  { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14269
  { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14270
  { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14271
  { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14272
 
14273
  { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14274
 
14275
  { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14276
  { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14277
  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14278
  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14279
 
14280
  { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14281
  { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14282
  { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14283
 
14284
  /* SSE3 */
14285
  { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14286
  { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14287
};
14288
 
14289
static void
14290
ix86_init_builtins (void)
14291
{
14292
  if (TARGET_MMX)
14293
    ix86_init_mmx_sse_builtins ();
14294
}
14295
 
14296
/* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
14297
   is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
14298
   builtins.  */
14299
static void
14300
ix86_init_mmx_sse_builtins (void)
14301
{
14302
  const struct builtin_description * d;
14303
  size_t i;
14304
 
14305
  tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14306
  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14307
  tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14308
  tree V2DI_type_node
14309
    = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14310
  tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14311
  tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14312
  tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14313
  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14314
  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14315
  tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14316
 
14317
  tree pchar_type_node = build_pointer_type (char_type_node);
14318
  tree pcchar_type_node = build_pointer_type (
14319
                             build_type_variant (char_type_node, 1, 0));
14320
  tree pfloat_type_node = build_pointer_type (float_type_node);
14321
  tree pcfloat_type_node = build_pointer_type (
14322
                             build_type_variant (float_type_node, 1, 0));
14323
  tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14324
  tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14325
  tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14326
 
14327
  /* Comparisons.  */
14328
  tree int_ftype_v4sf_v4sf
14329
    = build_function_type_list (integer_type_node,
14330
                                V4SF_type_node, V4SF_type_node, NULL_TREE);
14331
  tree v4si_ftype_v4sf_v4sf
14332
    = build_function_type_list (V4SI_type_node,
14333
                                V4SF_type_node, V4SF_type_node, NULL_TREE);
14334
  /* MMX/SSE/integer conversions.  */
14335
  tree int_ftype_v4sf
14336
    = build_function_type_list (integer_type_node,
14337
                                V4SF_type_node, NULL_TREE);
14338
  tree int64_ftype_v4sf
14339
    = build_function_type_list (long_long_integer_type_node,
14340
                                V4SF_type_node, NULL_TREE);
14341
  tree int_ftype_v8qi
14342
    = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14343
  tree v4sf_ftype_v4sf_int
14344
    = build_function_type_list (V4SF_type_node,
14345
                                V4SF_type_node, integer_type_node, NULL_TREE);
14346
  tree v4sf_ftype_v4sf_int64
14347
    = build_function_type_list (V4SF_type_node,
14348
                                V4SF_type_node, long_long_integer_type_node,
14349
                                NULL_TREE);
14350
  tree v4sf_ftype_v4sf_v2si
14351
    = build_function_type_list (V4SF_type_node,
14352
                                V4SF_type_node, V2SI_type_node, NULL_TREE);
14353
 
14354
  /* Miscellaneous.  */
14355
  tree v8qi_ftype_v4hi_v4hi
14356
    = build_function_type_list (V8QI_type_node,
14357
                                V4HI_type_node, V4HI_type_node, NULL_TREE);
14358
  tree v4hi_ftype_v2si_v2si
14359
    = build_function_type_list (V4HI_type_node,
14360
                                V2SI_type_node, V2SI_type_node, NULL_TREE);
14361
  tree v4sf_ftype_v4sf_v4sf_int
14362
    = build_function_type_list (V4SF_type_node,
14363
                                V4SF_type_node, V4SF_type_node,
14364
                                integer_type_node, NULL_TREE);
14365
  tree v2si_ftype_v4hi_v4hi
14366
    = build_function_type_list (V2SI_type_node,
14367
                                V4HI_type_node, V4HI_type_node, NULL_TREE);
14368
  tree v4hi_ftype_v4hi_int
14369
    = build_function_type_list (V4HI_type_node,
14370
                                V4HI_type_node, integer_type_node, NULL_TREE);
14371
  tree v4hi_ftype_v4hi_di
14372
    = build_function_type_list (V4HI_type_node,
14373
                                V4HI_type_node, long_long_unsigned_type_node,
14374
                                NULL_TREE);
14375
  tree v2si_ftype_v2si_di
14376
    = build_function_type_list (V2SI_type_node,
14377
                                V2SI_type_node, long_long_unsigned_type_node,
14378
                                NULL_TREE);
14379
  tree void_ftype_void
14380
    = build_function_type (void_type_node, void_list_node);
14381
  tree void_ftype_unsigned
14382
    = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
14383
  tree void_ftype_unsigned_unsigned
14384
    = build_function_type_list (void_type_node, unsigned_type_node,
14385
                                unsigned_type_node, NULL_TREE);
14386
  tree void_ftype_pcvoid_unsigned_unsigned
14387
    = build_function_type_list (void_type_node, const_ptr_type_node,
14388
                                unsigned_type_node, unsigned_type_node,
14389
                                NULL_TREE);
14390
  tree unsigned_ftype_void
14391
    = build_function_type (unsigned_type_node, void_list_node);
14392
  tree v2si_ftype_v4sf
14393
    = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
14394
  /* Loads/stores.  */
14395
  tree void_ftype_v8qi_v8qi_pchar
14396
    = build_function_type_list (void_type_node,
14397
                                V8QI_type_node, V8QI_type_node,
14398
                                pchar_type_node, NULL_TREE);
14399
  tree v4sf_ftype_pcfloat
14400
    = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
14401
  /* @@@ the type is bogus */
14402
  tree v4sf_ftype_v4sf_pv2si
14403
    = build_function_type_list (V4SF_type_node,
14404
                                V4SF_type_node, pv2si_type_node, NULL_TREE);
14405
  tree void_ftype_pv2si_v4sf
14406
    = build_function_type_list (void_type_node,
14407
                                pv2si_type_node, V4SF_type_node, NULL_TREE);
14408
  tree void_ftype_pfloat_v4sf
14409
    = build_function_type_list (void_type_node,
14410
                                pfloat_type_node, V4SF_type_node, NULL_TREE);
14411
  tree void_ftype_pdi_di
14412
    = build_function_type_list (void_type_node,
14413
                                pdi_type_node, long_long_unsigned_type_node,
14414
                                NULL_TREE);
14415
  tree void_ftype_pv2di_v2di
14416
    = build_function_type_list (void_type_node,
14417
                                pv2di_type_node, V2DI_type_node, NULL_TREE);
14418
  /* Normal vector unops.  */
14419
  tree v4sf_ftype_v4sf
14420
    = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14421
 
14422
  /* Normal vector binops.  */
14423
  tree v4sf_ftype_v4sf_v4sf
14424
    = build_function_type_list (V4SF_type_node,
14425
                                V4SF_type_node, V4SF_type_node, NULL_TREE);
14426
  tree v8qi_ftype_v8qi_v8qi
14427
    = build_function_type_list (V8QI_type_node,
14428
                                V8QI_type_node, V8QI_type_node, NULL_TREE);
14429
  tree v4hi_ftype_v4hi_v4hi
14430
    = build_function_type_list (V4HI_type_node,
14431
                                V4HI_type_node, V4HI_type_node, NULL_TREE);
14432
  tree v2si_ftype_v2si_v2si
14433
    = build_function_type_list (V2SI_type_node,
14434
                                V2SI_type_node, V2SI_type_node, NULL_TREE);
14435
  tree di_ftype_di_di
14436
    = build_function_type_list (long_long_unsigned_type_node,
14437
                                long_long_unsigned_type_node,
14438
                                long_long_unsigned_type_node, NULL_TREE);
14439
 
14440
  tree v2si_ftype_v2sf
14441
    = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
14442
  tree v2sf_ftype_v2si
14443
    = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
14444
  tree v2si_ftype_v2si
14445
    = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
14446
  tree v2sf_ftype_v2sf
14447
    = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
14448
  tree v2sf_ftype_v2sf_v2sf
14449
    = build_function_type_list (V2SF_type_node,
14450
                                V2SF_type_node, V2SF_type_node, NULL_TREE);
14451
  tree v2si_ftype_v2sf_v2sf
14452
    = build_function_type_list (V2SI_type_node,
14453
                                V2SF_type_node, V2SF_type_node, NULL_TREE);
14454
  tree pint_type_node    = build_pointer_type (integer_type_node);
14455
  tree pdouble_type_node = build_pointer_type (double_type_node);
14456
  tree pcdouble_type_node = build_pointer_type (
14457
                                build_type_variant (double_type_node, 1, 0));
14458
  tree int_ftype_v2df_v2df
14459
    = build_function_type_list (integer_type_node,
14460
                                V2DF_type_node, V2DF_type_node, NULL_TREE);
14461
 
14462
  tree ti_ftype_ti_ti
14463
    = build_function_type_list (intTI_type_node,
14464
                                intTI_type_node, intTI_type_node, NULL_TREE);
14465
  tree void_ftype_pcvoid
14466
    = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
14467
  tree v4sf_ftype_v4si
14468
    = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
14469
  tree v4si_ftype_v4sf
14470
    = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
14471
  tree v2df_ftype_v4si
14472
    = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
14473
  tree v4si_ftype_v2df
14474
    = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
14475
  tree v2si_ftype_v2df
14476
    = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
14477
  tree v4sf_ftype_v2df
14478
    = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
14479
  tree v2df_ftype_v2si
14480
    = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
14481
  tree v2df_ftype_v4sf
14482
    = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
14483
  tree int_ftype_v2df
14484
    = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
14485
  tree int64_ftype_v2df
14486
    = build_function_type_list (long_long_integer_type_node,
14487
                                V2DF_type_node, NULL_TREE);
14488
  tree v2df_ftype_v2df_int
14489
    = build_function_type_list (V2DF_type_node,
14490
                                V2DF_type_node, integer_type_node, NULL_TREE);
14491
  tree v2df_ftype_v2df_int64
14492
    = build_function_type_list (V2DF_type_node,
14493
                                V2DF_type_node, long_long_integer_type_node,
14494
                                NULL_TREE);
14495
  tree v4sf_ftype_v4sf_v2df
14496
    = build_function_type_list (V4SF_type_node,
14497
                                V4SF_type_node, V2DF_type_node, NULL_TREE);
14498
  tree v2df_ftype_v2df_v4sf
14499
    = build_function_type_list (V2DF_type_node,
14500
                                V2DF_type_node, V4SF_type_node, NULL_TREE);
14501
  tree v2df_ftype_v2df_v2df_int
14502
    = build_function_type_list (V2DF_type_node,
14503
                                V2DF_type_node, V2DF_type_node,
14504
                                integer_type_node,
14505
                                NULL_TREE);
14506
  tree v2df_ftype_v2df_pcdouble
14507
    = build_function_type_list (V2DF_type_node,
14508
                                V2DF_type_node, pcdouble_type_node, NULL_TREE);
14509
  tree void_ftype_pdouble_v2df
14510
    = build_function_type_list (void_type_node,
14511
                                pdouble_type_node, V2DF_type_node, NULL_TREE);
14512
  tree void_ftype_pint_int
14513
    = build_function_type_list (void_type_node,
14514
                                pint_type_node, integer_type_node, NULL_TREE);
14515
  tree void_ftype_v16qi_v16qi_pchar
14516
    = build_function_type_list (void_type_node,
14517
                                V16QI_type_node, V16QI_type_node,
14518
                                pchar_type_node, NULL_TREE);
14519
  tree v2df_ftype_pcdouble
14520
    = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
14521
  tree v2df_ftype_v2df_v2df
14522
    = build_function_type_list (V2DF_type_node,
14523
                                V2DF_type_node, V2DF_type_node, NULL_TREE);
14524
  tree v16qi_ftype_v16qi_v16qi
14525
    = build_function_type_list (V16QI_type_node,
14526
                                V16QI_type_node, V16QI_type_node, NULL_TREE);
14527
  tree v8hi_ftype_v8hi_v8hi
14528
    = build_function_type_list (V8HI_type_node,
14529
                                V8HI_type_node, V8HI_type_node, NULL_TREE);
14530
  tree v4si_ftype_v4si_v4si
14531
    = build_function_type_list (V4SI_type_node,
14532
                                V4SI_type_node, V4SI_type_node, NULL_TREE);
14533
  tree v2di_ftype_v2di_v2di
14534
    = build_function_type_list (V2DI_type_node,
14535
                                V2DI_type_node, V2DI_type_node, NULL_TREE);
14536
  tree v2di_ftype_v2df_v2df
14537
    = build_function_type_list (V2DI_type_node,
14538
                                V2DF_type_node, V2DF_type_node, NULL_TREE);
14539
  tree v2df_ftype_v2df
14540
    = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14541
  tree v2di_ftype_v2di_int
14542
    = build_function_type_list (V2DI_type_node,
14543
                                V2DI_type_node, integer_type_node, NULL_TREE);
14544
  tree v4si_ftype_v4si_int
14545
    = build_function_type_list (V4SI_type_node,
14546
                                V4SI_type_node, integer_type_node, NULL_TREE);
14547
  tree v8hi_ftype_v8hi_int
14548
    = build_function_type_list (V8HI_type_node,
14549
                                V8HI_type_node, integer_type_node, NULL_TREE);
14550
  tree v8hi_ftype_v8hi_v2di
14551
    = build_function_type_list (V8HI_type_node,
14552
                                V8HI_type_node, V2DI_type_node, NULL_TREE);
14553
  tree v4si_ftype_v4si_v2di
14554
    = build_function_type_list (V4SI_type_node,
14555
                                V4SI_type_node, V2DI_type_node, NULL_TREE);
14556
  tree v4si_ftype_v8hi_v8hi
14557
    = build_function_type_list (V4SI_type_node,
14558
                                V8HI_type_node, V8HI_type_node, NULL_TREE);
14559
  tree di_ftype_v8qi_v8qi
14560
    = build_function_type_list (long_long_unsigned_type_node,
14561
                                V8QI_type_node, V8QI_type_node, NULL_TREE);
14562
  tree di_ftype_v2si_v2si
14563
    = build_function_type_list (long_long_unsigned_type_node,
14564
                                V2SI_type_node, V2SI_type_node, NULL_TREE);
14565
  tree v2di_ftype_v16qi_v16qi
14566
    = build_function_type_list (V2DI_type_node,
14567
                                V16QI_type_node, V16QI_type_node, NULL_TREE);
14568
  tree v2di_ftype_v4si_v4si
14569
    = build_function_type_list (V2DI_type_node,
14570
                                V4SI_type_node, V4SI_type_node, NULL_TREE);
14571
  tree int_ftype_v16qi
14572
    = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
14573
  tree v16qi_ftype_pcchar
14574
    = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
14575
  tree void_ftype_pchar_v16qi
14576
    = build_function_type_list (void_type_node,
14577
                                pchar_type_node, V16QI_type_node, NULL_TREE);
14578
 
14579
  tree float80_type;
14580
  tree float128_type;
14581
  tree ftype;
14582
 
14583
  /* The __float80 type.  */
14584
  if (TYPE_MODE (long_double_type_node) == XFmode)
14585
    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
14586
                                               "__float80");
14587
  else
14588
    {
14589
      /* The __float80 type.  */
14590
      float80_type = make_node (REAL_TYPE);
14591
      TYPE_PRECISION (float80_type) = 80;
14592
      layout_type (float80_type);
14593
      (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
14594
    }
14595
 
14596
  if (TARGET_64BIT)
14597
    {
14598
      float128_type = make_node (REAL_TYPE);
14599
      TYPE_PRECISION (float128_type) = 128;
14600
      layout_type (float128_type);
14601
      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
14602
    }
14603
 
14604
  /* Add all builtins that are more or less simple operations on two
14605
     operands.  */
14606
  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14607
    {
14608
      /* Use one of the operands; the target can have a different mode for
14609
         mask-generating compares.  */
14610
      enum machine_mode mode;
14611
      tree type;
14612
 
14613
      if (d->name == 0)
14614
        continue;
14615
      mode = insn_data[d->icode].operand[1].mode;
14616
 
14617
      switch (mode)
14618
        {
14619
        case V16QImode:
14620
          type = v16qi_ftype_v16qi_v16qi;
14621
          break;
14622
        case V8HImode:
14623
          type = v8hi_ftype_v8hi_v8hi;
14624
          break;
14625
        case V4SImode:
14626
          type = v4si_ftype_v4si_v4si;
14627
          break;
14628
        case V2DImode:
14629
          type = v2di_ftype_v2di_v2di;
14630
          break;
14631
        case V2DFmode:
14632
          type = v2df_ftype_v2df_v2df;
14633
          break;
14634
        case TImode:
14635
          type = ti_ftype_ti_ti;
14636
          break;
14637
        case V4SFmode:
14638
          type = v4sf_ftype_v4sf_v4sf;
14639
          break;
14640
        case V8QImode:
14641
          type = v8qi_ftype_v8qi_v8qi;
14642
          break;
14643
        case V4HImode:
14644
          type = v4hi_ftype_v4hi_v4hi;
14645
          break;
14646
        case V2SImode:
14647
          type = v2si_ftype_v2si_v2si;
14648
          break;
14649
        case DImode:
14650
          type = di_ftype_di_di;
14651
          break;
14652
 
14653
        default:
14654
          gcc_unreachable ();
14655
        }
14656
 
14657
      /* Override for comparisons.  */
14658
      if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14659
          || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
14660
        type = v4si_ftype_v4sf_v4sf;
14661
 
14662
      if (d->icode == CODE_FOR_sse2_maskcmpv2df3
14663
          || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
14664
        type = v2di_ftype_v2df_v2df;
14665
 
14666
      def_builtin (d->mask, d->name, type, d->code);
14667
    }
14668
 
14669
  /* Add the remaining MMX insns with somewhat more complicated types.  */
14670
  def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
14671
  def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
14672
  def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
14673
  def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
14674
 
14675
  def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
14676
  def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
14677
  def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
14678
 
14679
  def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
14680
  def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
14681
 
14682
  def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
14683
  def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
14684
 
14685
  /* comi/ucomi insns.  */
14686
  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14687
    if (d->mask == MASK_SSE2)
14688
      def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
14689
    else
14690
      def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
14691
 
14692
  def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
14693
  def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
14694
  def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
14695
 
14696
  def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
14697
  def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
14698
  def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
14699
  def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
14700
  def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
14701
  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
14702
  def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
14703
  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
14704
  def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
14705
  def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
14706
  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
14707
 
14708
  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
14709
 
14710
  def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
14711
  def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
14712
 
14713
  def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
14714
  def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
14715
  def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
14716
  def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
14717
 
14718
  def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
14719
  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
14720
  def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
14721
  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
14722
 
14723
  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
14724
 
14725
  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
14726
 
14727
  def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
14728
  def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
14729
  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
14730
  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
14731
  def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
14732
  def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
14733
 
14734
  def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
14735
 
14736
  /* Original 3DNow!  */
14737
  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
14738
  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
14739
  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
14740
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
14741
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
14742
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
14743
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
14744
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
14745
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
14746
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
14747
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
14748
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
14749
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
14750
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
14751
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
14752
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
14753
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
14754
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
14755
  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
14756
  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
14757
 
14758
  /* 3DNow! extension as used in the Athlon CPU.  */
14759
  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
14760
  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
14761
  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
14762
  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
14763
  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
14764
  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
14765
 
14766
  /* SSE2 */
14767
  def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
14768
 
14769
  def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
14770
  def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
14771
 
14772
  def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
14773
  def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
14774
 
14775
  def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
14776
  def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
14777
  def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
14778
  def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
14779
  def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
14780
 
14781
  def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
14782
  def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
14783
  def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
14784
  def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
14785
 
14786
  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
14787
  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
14788
 
14789
  def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
14790
 
14791
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
14792
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
14793
 
14794
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
14795
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
14796
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
14797
  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
14798
  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
14799
 
14800
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
14801
 
14802
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
14803
  def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
14804
  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
14805
  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
14806
 
14807
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
14808
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
14809
  def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
14810
 
14811
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
14812
  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
14813
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
14814
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
14815
 
14816
  def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
14817
  def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
14818
  def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
14819
 
14820
  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
14821
  def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
14822
 
14823
  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
14824
  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
14825
 
14826
  def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
14827
  def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
14828
  def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
14829
 
14830
  def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
14831
  def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
14832
  def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
14833
 
14834
  def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
14835
  def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
14836
 
14837
  def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
14838
  def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
14839
  def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
14840
  def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
14841
 
14842
  def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
14843
  def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
14844
  def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
14845
  def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
14846
 
14847
  def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
14848
  def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
14849
 
14850
  def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
14851
 
14852
  /* Prescott New Instructions.  */
14853
  def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
14854
               void_ftype_pcvoid_unsigned_unsigned,
14855
               IX86_BUILTIN_MONITOR);
14856
  def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
14857
               void_ftype_unsigned_unsigned,
14858
               IX86_BUILTIN_MWAIT);
14859
  def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
14860
               v4sf_ftype_v4sf,
14861
               IX86_BUILTIN_MOVSHDUP);
14862
  def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
14863
               v4sf_ftype_v4sf,
14864
               IX86_BUILTIN_MOVSLDUP);
14865
  def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
14866
               v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
14867
 
14868
  /* Access to the vec_init patterns.  */
14869
  ftype = build_function_type_list (V2SI_type_node, integer_type_node,
14870
                                    integer_type_node, NULL_TREE);
14871
  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
14872
               ftype, IX86_BUILTIN_VEC_INIT_V2SI);
14873
 
14874
  ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
14875
                                    short_integer_type_node,
14876
                                    short_integer_type_node,
14877
                                    short_integer_type_node, NULL_TREE);
14878
  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
14879
               ftype, IX86_BUILTIN_VEC_INIT_V4HI);
14880
 
14881
  ftype = build_function_type_list (V8QI_type_node, char_type_node,
14882
                                    char_type_node, char_type_node,
14883
                                    char_type_node, char_type_node,
14884
                                    char_type_node, char_type_node,
14885
                                    char_type_node, NULL_TREE);
14886
  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
14887
               ftype, IX86_BUILTIN_VEC_INIT_V8QI);
14888
 
14889
  /* Access to the vec_extract patterns.  */
14890
  ftype = build_function_type_list (double_type_node, V2DF_type_node,
14891
                                    integer_type_node, NULL_TREE);
14892
  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
14893
               ftype, IX86_BUILTIN_VEC_EXT_V2DF);
14894
 
14895
  ftype = build_function_type_list (long_long_integer_type_node,
14896
                                    V2DI_type_node, integer_type_node,
14897
                                    NULL_TREE);
14898
  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
14899
               ftype, IX86_BUILTIN_VEC_EXT_V2DI);
14900
 
14901
  ftype = build_function_type_list (float_type_node, V4SF_type_node,
14902
                                    integer_type_node, NULL_TREE);
14903
  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
14904
               ftype, IX86_BUILTIN_VEC_EXT_V4SF);
14905
 
14906
  ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
14907
                                    integer_type_node, NULL_TREE);
14908
  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
14909
               ftype, IX86_BUILTIN_VEC_EXT_V4SI);
14910
 
14911
  ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
14912
                                    integer_type_node, NULL_TREE);
14913
  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
14914
               ftype, IX86_BUILTIN_VEC_EXT_V8HI);
14915
 
14916
  ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
14917
                                    integer_type_node, NULL_TREE);
14918
  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
14919
               ftype, IX86_BUILTIN_VEC_EXT_V4HI);
14920
 
14921
  ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
14922
                                    integer_type_node, NULL_TREE);
14923
  def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
14924
               ftype, IX86_BUILTIN_VEC_EXT_V2SI);
14925
 
14926
  /* Access to the vec_set patterns.  */
14927
  ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
14928
                                    intHI_type_node,
14929
                                    integer_type_node, NULL_TREE);
14930
  def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
14931
               ftype, IX86_BUILTIN_VEC_SET_V8HI);
14932
 
14933
  ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
14934
                                    intHI_type_node,
14935
                                    integer_type_node, NULL_TREE);
14936
  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
14937
               ftype, IX86_BUILTIN_VEC_SET_V4HI);
14938
}
14939
 
14940
/* Errors in the source file can cause expand_expr to return const0_rtx
14941
   where we expect a vector.  To avoid crashing, use one of the vector
14942
   clear instructions.  */
14943
static rtx
14944
safe_vector_operand (rtx x, enum machine_mode mode)
14945
{
14946
  if (x == const0_rtx)
14947
    x = CONST0_RTX (mode);
14948
  return x;
14949
}
14950
 
14951
/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
14952
 
14953
static rtx
14954
ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
14955
{
14956
  rtx pat, xops[3];
14957
  tree arg0 = TREE_VALUE (arglist);
14958
  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14959
  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14960
  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14961
  enum machine_mode tmode = insn_data[icode].operand[0].mode;
14962
  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14963
  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
14964
 
14965
  if (VECTOR_MODE_P (mode0))
14966
    op0 = safe_vector_operand (op0, mode0);
14967
  if (VECTOR_MODE_P (mode1))
14968
    op1 = safe_vector_operand (op1, mode1);
14969
 
14970
  if (optimize || !target
14971
      || GET_MODE (target) != tmode
14972
      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14973
    target = gen_reg_rtx (tmode);
14974
 
14975
  if (GET_MODE (op1) == SImode && mode1 == TImode)
14976
    {
14977
      rtx x = gen_reg_rtx (V4SImode);
14978
      emit_insn (gen_sse2_loadd (x, op1));
14979
      op1 = gen_lowpart (TImode, x);
14980
    }
14981
 
14982
  /* The insn must want input operands in the same modes as the
14983
     result.  */
14984
  gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
14985
              && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
14986
 
14987
  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
14988
    op0 = copy_to_mode_reg (mode0, op0);
14989
  if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
14990
    op1 = copy_to_mode_reg (mode1, op1);
14991
 
14992
  /* ??? Using ix86_fixup_binary_operands is problematic when
14993
     we've got mismatched modes.  Fake it.  */
14994
 
14995
  xops[0] = target;
14996
  xops[1] = op0;
14997
  xops[2] = op1;
14998
 
14999
  if (tmode == mode0 && tmode == mode1)
15000
    {
15001
      target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15002
      op0 = xops[1];
15003
      op1 = xops[2];
15004
    }
15005
  else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15006
    {
15007
      op0 = force_reg (mode0, op0);
15008
      op1 = force_reg (mode1, op1);
15009
      target = gen_reg_rtx (tmode);
15010
    }
15011
 
15012
  pat = GEN_FCN (icode) (target, op0, op1);
15013
  if (! pat)
15014
    return 0;
15015
  emit_insn (pat);
15016
  return target;
15017
}
15018
 
15019
/* Subroutine of ix86_expand_builtin to take care of stores.  */
15020
 
15021
static rtx
15022
ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15023
{
15024
  rtx pat;
15025
  tree arg0 = TREE_VALUE (arglist);
15026
  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15027
  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15028
  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15029
  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15030
  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15031
 
15032
  if (VECTOR_MODE_P (mode1))
15033
    op1 = safe_vector_operand (op1, mode1);
15034
 
15035
  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15036
  op1 = copy_to_mode_reg (mode1, op1);
15037
 
15038
  pat = GEN_FCN (icode) (op0, op1);
15039
  if (pat)
15040
    emit_insn (pat);
15041
  return 0;
15042
}
15043
 
15044
/* Subroutine of ix86_expand_builtin to take care of unop insns.  */
15045
 
15046
static rtx
15047
ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15048
                          rtx target, int do_load)
15049
{
15050
  rtx pat;
15051
  tree arg0 = TREE_VALUE (arglist);
15052
  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15053
  enum machine_mode tmode = insn_data[icode].operand[0].mode;
15054
  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15055
 
15056
  if (optimize || !target
15057
      || GET_MODE (target) != tmode
15058
      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15059
    target = gen_reg_rtx (tmode);
15060
  if (do_load)
15061
    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15062
  else
15063
    {
15064
      if (VECTOR_MODE_P (mode0))
15065
        op0 = safe_vector_operand (op0, mode0);
15066
 
15067
      if ((optimize && !register_operand (op0, mode0))
15068
          || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15069
        op0 = copy_to_mode_reg (mode0, op0);
15070
    }
15071
 
15072
  pat = GEN_FCN (icode) (target, op0);
15073
  if (! pat)
15074
    return 0;
15075
  emit_insn (pat);
15076
  return target;
15077
}
15078
 
15079
/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15080
   sqrtss, rsqrtss, rcpss.  */
15081
 
15082
static rtx
15083
ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15084
{
15085
  rtx pat;
15086
  tree arg0 = TREE_VALUE (arglist);
15087
  rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15088
  enum machine_mode tmode = insn_data[icode].operand[0].mode;
15089
  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15090
 
15091
  if (optimize || !target
15092
      || GET_MODE (target) != tmode
15093
      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15094
    target = gen_reg_rtx (tmode);
15095
 
15096
  if (VECTOR_MODE_P (mode0))
15097
    op0 = safe_vector_operand (op0, mode0);
15098
 
15099
  if ((optimize && !register_operand (op0, mode0))
15100
      || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15101
    op0 = copy_to_mode_reg (mode0, op0);
15102
 
15103
  op1 = op0;
15104
  if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15105
    op1 = copy_to_mode_reg (mode0, op1);
15106
 
15107
  pat = GEN_FCN (icode) (target, op0, op1);
15108
  if (! pat)
15109
    return 0;
15110
  emit_insn (pat);
15111
  return target;
15112
}
15113
 
15114
/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
15115
 
15116
static rtx
15117
ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15118
                         rtx target)
15119
{
15120
  rtx pat;
15121
  tree arg0 = TREE_VALUE (arglist);
15122
  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15123
  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15124
  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15125
  rtx op2;
15126
  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15127
  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15128
  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15129
  enum rtx_code comparison = d->comparison;
15130
 
15131
  if (VECTOR_MODE_P (mode0))
15132
    op0 = safe_vector_operand (op0, mode0);
15133
  if (VECTOR_MODE_P (mode1))
15134
    op1 = safe_vector_operand (op1, mode1);
15135
 
15136
  /* Swap operands if we have a comparison that isn't available in
15137
     hardware.  */
15138
  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15139
    {
15140
      rtx tmp = gen_reg_rtx (mode1);
15141
      emit_move_insn (tmp, op1);
15142
      op1 = op0;
15143
      op0 = tmp;
15144
    }
15145
 
15146
  if (optimize || !target
15147
      || GET_MODE (target) != tmode
15148
      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15149
    target = gen_reg_rtx (tmode);
15150
 
15151
  if ((optimize && !register_operand (op0, mode0))
15152
      || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15153
    op0 = copy_to_mode_reg (mode0, op0);
15154
  if ((optimize && !register_operand (op1, mode1))
15155
      || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15156
    op1 = copy_to_mode_reg (mode1, op1);
15157
 
15158
  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15159
  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15160
  if (! pat)
15161
    return 0;
15162
  emit_insn (pat);
15163
  return target;
15164
}
15165
 
15166
/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
15167
 
15168
static rtx
15169
ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15170
                      rtx target)
15171
{
15172
  rtx pat;
15173
  tree arg0 = TREE_VALUE (arglist);
15174
  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15175
  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15176
  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15177
  rtx op2;
15178
  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15179
  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15180
  enum rtx_code comparison = d->comparison;
15181
 
15182
  if (VECTOR_MODE_P (mode0))
15183
    op0 = safe_vector_operand (op0, mode0);
15184
  if (VECTOR_MODE_P (mode1))
15185
    op1 = safe_vector_operand (op1, mode1);
15186
 
15187
  /* Swap operands if we have a comparison that isn't available in
15188
     hardware.  */
15189
  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15190
    {
15191
      rtx tmp = op1;
15192
      op1 = op0;
15193
      op0 = tmp;
15194
    }
15195
 
15196
  target = gen_reg_rtx (SImode);
15197
  emit_move_insn (target, const0_rtx);
15198
  target = gen_rtx_SUBREG (QImode, target, 0);
15199
 
15200
  if ((optimize && !register_operand (op0, mode0))
15201
      || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15202
    op0 = copy_to_mode_reg (mode0, op0);
15203
  if ((optimize && !register_operand (op1, mode1))
15204
      || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15205
    op1 = copy_to_mode_reg (mode1, op1);
15206
 
15207
  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15208
  pat = GEN_FCN (d->icode) (op0, op1);
15209
  if (! pat)
15210
    return 0;
15211
  emit_insn (pat);
15212
  emit_insn (gen_rtx_SET (VOIDmode,
15213
                          gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15214
                          gen_rtx_fmt_ee (comparison, QImode,
15215
                                          SET_DEST (pat),
15216
                                          const0_rtx)));
15217
 
15218
  return SUBREG_REG (target);
15219
}
15220
 
15221
/* Return the integer constant in ARG.  Constrain it to be in the range
15222
   of the subparts of VEC_TYPE; issue an error if not.  */
15223
 
15224
static int
15225
get_element_number (tree vec_type, tree arg)
15226
{
15227
  unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15228
 
15229
  if (!host_integerp (arg, 1)
15230
      || (elt = tree_low_cst (arg, 1), elt > max))
15231
    {
15232
      error ("selector must be an integer constant in the range 0..%wi", max);
15233
      return 0;
15234
    }
15235
 
15236
  return elt;
15237
}
15238
 
15239
/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
15240
   ix86_expand_vector_init.  We DO have language-level syntax for this, in
15241
   the form of  (type){ init-list }.  Except that since we can't place emms
15242
   instructions from inside the compiler, we can't allow the use of MMX
15243
   registers unless the user explicitly asks for it.  So we do *not* define
15244
   vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
15245
   we have builtins invoked by mmintrin.h that gives us license to emit
15246
   these sorts of instructions.  */
15247
 
15248
static rtx
15249
ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15250
{
15251
  enum machine_mode tmode = TYPE_MODE (type);
15252
  enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15253
  int i, n_elt = GET_MODE_NUNITS (tmode);
15254
  rtvec v = rtvec_alloc (n_elt);
15255
 
15256
  gcc_assert (VECTOR_MODE_P (tmode));
15257
 
15258
  for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15259
    {
15260
      rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
15261
      RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15262
    }
15263
 
15264
  gcc_assert (arglist == NULL);
15265
 
15266
  if (!target || !register_operand (target, tmode))
15267
    target = gen_reg_rtx (tmode);
15268
 
15269
  ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15270
  return target;
15271
}
15272
 
15273
/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
15274
   ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
15275
   had a language-level syntax for referencing vector elements.  */
15276
 
15277
static rtx
15278
ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15279
{
15280
  enum machine_mode tmode, mode0;
15281
  tree arg0, arg1;
15282
  int elt;
15283
  rtx op0;
15284
 
15285
  arg0 = TREE_VALUE (arglist);
15286
  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15287
 
15288
  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15289
  elt = get_element_number (TREE_TYPE (arg0), arg1);
15290
 
15291
  tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15292
  mode0 = TYPE_MODE (TREE_TYPE (arg0));
15293
  gcc_assert (VECTOR_MODE_P (mode0));
15294
 
15295
  op0 = force_reg (mode0, op0);
15296
 
15297
  if (optimize || !target || !register_operand (target, tmode))
15298
    target = gen_reg_rtx (tmode);
15299
 
15300
  ix86_expand_vector_extract (true, target, op0, elt);
15301
 
15302
  return target;
15303
}
15304
 
15305
/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
15306
   ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
15307
   a language-level syntax for referencing vector elements.  */
15308
 
15309
static rtx
15310
ix86_expand_vec_set_builtin (tree arglist)
15311
{
15312
  enum machine_mode tmode, mode1;
15313
  tree arg0, arg1, arg2;
15314
  int elt;
15315
  rtx op0, op1;
15316
 
15317
  arg0 = TREE_VALUE (arglist);
15318
  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15319
  arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15320
 
15321
  tmode = TYPE_MODE (TREE_TYPE (arg0));
15322
  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15323
  gcc_assert (VECTOR_MODE_P (tmode));
15324
 
15325
  op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15326
  op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15327
  elt = get_element_number (TREE_TYPE (arg0), arg2);
15328
 
15329
  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15330
    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15331
 
15332
  op0 = force_reg (tmode, op0);
15333
  op1 = force_reg (mode1, op1);
15334
 
15335
  ix86_expand_vector_set (true, op0, op1, elt);
15336
 
15337
  return op0;
15338
}
15339
 
15340
/* Expand an expression EXP that calls a built-in function,
15341
   with result going to TARGET if that's convenient
15342
   (and in mode MODE if that's convenient).
15343
   SUBTARGET may be used as the target for computing one of EXP's operands.
15344
   IGNORE is nonzero if the value is to be ignored.  */
15345
 
15346
static rtx
15347
ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15348
                     enum machine_mode mode ATTRIBUTE_UNUSED,
15349
                     int ignore ATTRIBUTE_UNUSED)
15350
{
15351
  const struct builtin_description *d;
15352
  size_t i;
15353
  enum insn_code icode;
15354
  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15355
  tree arglist = TREE_OPERAND (exp, 1);
15356
  tree arg0, arg1, arg2;
15357
  rtx op0, op1, op2, pat;
15358
  enum machine_mode tmode, mode0, mode1, mode2;
15359
  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15360
 
15361
  switch (fcode)
15362
    {
15363
    case IX86_BUILTIN_EMMS:
15364
      emit_insn (gen_mmx_emms ());
15365
      return 0;
15366
 
15367
    case IX86_BUILTIN_SFENCE:
15368
      emit_insn (gen_sse_sfence ());
15369
      return 0;
15370
 
15371
    case IX86_BUILTIN_MASKMOVQ:
15372
    case IX86_BUILTIN_MASKMOVDQU:
15373
      icode = (fcode == IX86_BUILTIN_MASKMOVQ
15374
               ? CODE_FOR_mmx_maskmovq
15375
               : CODE_FOR_sse2_maskmovdqu);
15376
      /* Note the arg order is different from the operand order.  */
15377
      arg1 = TREE_VALUE (arglist);
15378
      arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15379
      arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15380
      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15381
      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15382
      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15383
      mode0 = insn_data[icode].operand[0].mode;
15384
      mode1 = insn_data[icode].operand[1].mode;
15385
      mode2 = insn_data[icode].operand[2].mode;
15386
 
15387
      op0 = force_reg (Pmode, op0);
15388
      op0 = gen_rtx_MEM (mode1, op0);
15389
 
15390
      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15391
        op0 = copy_to_mode_reg (mode0, op0);
15392
      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15393
        op1 = copy_to_mode_reg (mode1, op1);
15394
      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
15395
        op2 = copy_to_mode_reg (mode2, op2);
15396
      pat = GEN_FCN (icode) (op0, op1, op2);
15397
      if (! pat)
15398
        return 0;
15399
      emit_insn (pat);
15400
      return 0;
15401
 
15402
    case IX86_BUILTIN_SQRTSS:
15403
      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
15404
    case IX86_BUILTIN_RSQRTSS:
15405
      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
15406
    case IX86_BUILTIN_RCPSS:
15407
      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
15408
 
15409
    case IX86_BUILTIN_LOADUPS:
15410
      return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
15411
 
15412
    case IX86_BUILTIN_STOREUPS:
15413
      return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
15414
 
15415
    case IX86_BUILTIN_LOADHPS:
15416
    case IX86_BUILTIN_LOADLPS:
15417
    case IX86_BUILTIN_LOADHPD:
15418
    case IX86_BUILTIN_LOADLPD:
15419
      icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
15420
               : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
15421
               : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
15422
               : CODE_FOR_sse2_loadlpd);
15423
      arg0 = TREE_VALUE (arglist);
15424
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15425
      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15426
      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15427
      tmode = insn_data[icode].operand[0].mode;
15428
      mode0 = insn_data[icode].operand[1].mode;
15429
      mode1 = insn_data[icode].operand[2].mode;
15430
 
15431
      op0 = force_reg (mode0, op0);
15432
      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
15433
      if (optimize || target == 0
15434
          || GET_MODE (target) != tmode
15435
          || !register_operand (target, tmode))
15436
        target = gen_reg_rtx (tmode);
15437
      pat = GEN_FCN (icode) (target, op0, op1);
15438
      if (! pat)
15439
        return 0;
15440
      emit_insn (pat);
15441
      return target;
15442
 
15443
    case IX86_BUILTIN_STOREHPS:
15444
    case IX86_BUILTIN_STORELPS:
15445
      icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
15446
               : CODE_FOR_sse_storelps);
15447
      arg0 = TREE_VALUE (arglist);
15448
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15449
      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15450
      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15451
      mode0 = insn_data[icode].operand[0].mode;
15452
      mode1 = insn_data[icode].operand[1].mode;
15453
 
15454
      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15455
      op1 = force_reg (mode1, op1);
15456
 
15457
      pat = GEN_FCN (icode) (op0, op1);
15458
      if (! pat)
15459
        return 0;
15460
      emit_insn (pat);
15461
      return const0_rtx;
15462
 
15463
    case IX86_BUILTIN_MOVNTPS:
15464
      return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
15465
    case IX86_BUILTIN_MOVNTQ:
15466
      return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
15467
 
15468
    case IX86_BUILTIN_LDMXCSR:
15469
      op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
15470
      target = assign_386_stack_local (SImode, SLOT_TEMP);
15471
      emit_move_insn (target, op0);
15472
      emit_insn (gen_sse_ldmxcsr (target));
15473
      return 0;
15474
 
15475
    case IX86_BUILTIN_STMXCSR:
15476
      target = assign_386_stack_local (SImode, SLOT_TEMP);
15477
      emit_insn (gen_sse_stmxcsr (target));
15478
      return copy_to_mode_reg (SImode, target);
15479
 
15480
    case IX86_BUILTIN_SHUFPS:
15481
    case IX86_BUILTIN_SHUFPD:
15482
      icode = (fcode == IX86_BUILTIN_SHUFPS
15483
               ? CODE_FOR_sse_shufps
15484
               : CODE_FOR_sse2_shufpd);
15485
      arg0 = TREE_VALUE (arglist);
15486
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15487
      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15488
      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15489
      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15490
      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15491
      tmode = insn_data[icode].operand[0].mode;
15492
      mode0 = insn_data[icode].operand[1].mode;
15493
      mode1 = insn_data[icode].operand[2].mode;
15494
      mode2 = insn_data[icode].operand[3].mode;
15495
 
15496
      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15497
        op0 = copy_to_mode_reg (mode0, op0);
15498
      if ((optimize && !register_operand (op1, mode1))
15499
          || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
15500
        op1 = copy_to_mode_reg (mode1, op1);
15501
      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15502
        {
15503
          /* @@@ better error message */
15504
          error ("mask must be an immediate");
15505
          return gen_reg_rtx (tmode);
15506
        }
15507
      if (optimize || target == 0
15508
          || GET_MODE (target) != tmode
15509
          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15510
        target = gen_reg_rtx (tmode);
15511
      pat = GEN_FCN (icode) (target, op0, op1, op2);
15512
      if (! pat)
15513
        return 0;
15514
      emit_insn (pat);
15515
      return target;
15516
 
15517
    case IX86_BUILTIN_PSHUFW:
15518
    case IX86_BUILTIN_PSHUFD:
15519
    case IX86_BUILTIN_PSHUFHW:
15520
    case IX86_BUILTIN_PSHUFLW:
15521
      icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
15522
               : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
15523
               : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
15524
               : CODE_FOR_mmx_pshufw);
15525
      arg0 = TREE_VALUE (arglist);
15526
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15527
      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15528
      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15529
      tmode = insn_data[icode].operand[0].mode;
15530
      mode1 = insn_data[icode].operand[1].mode;
15531
      mode2 = insn_data[icode].operand[2].mode;
15532
 
15533
      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15534
        op0 = copy_to_mode_reg (mode1, op0);
15535
      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15536
        {
15537
          /* @@@ better error message */
15538
          error ("mask must be an immediate");
15539
          return const0_rtx;
15540
        }
15541
      if (target == 0
15542
          || GET_MODE (target) != tmode
15543
          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15544
        target = gen_reg_rtx (tmode);
15545
      pat = GEN_FCN (icode) (target, op0, op1);
15546
      if (! pat)
15547
        return 0;
15548
      emit_insn (pat);
15549
      return target;
15550
 
15551
    case IX86_BUILTIN_PSLLDQI128:
15552
    case IX86_BUILTIN_PSRLDQI128:
15553
      icode = (  fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
15554
               : CODE_FOR_sse2_lshrti3);
15555
      arg0 = TREE_VALUE (arglist);
15556
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15557
      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15558
      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15559
      tmode = insn_data[icode].operand[0].mode;
15560
      mode1 = insn_data[icode].operand[1].mode;
15561
      mode2 = insn_data[icode].operand[2].mode;
15562
 
15563
      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15564
        {
15565
          op0 = copy_to_reg (op0);
15566
          op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
15567
        }
15568
      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15569
        {
15570
          error ("shift must be an immediate");
15571
          return const0_rtx;
15572
        }
15573
      target = gen_reg_rtx (V2DImode);
15574
      pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
15575
      if (! pat)
15576
        return 0;
15577
      emit_insn (pat);
15578
      return target;
15579
 
15580
    case IX86_BUILTIN_FEMMS:
15581
      emit_insn (gen_mmx_femms ());
15582
      return NULL_RTX;
15583
 
15584
    case IX86_BUILTIN_PAVGUSB:
15585
      return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
15586
 
15587
    case IX86_BUILTIN_PF2ID:
15588
      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
15589
 
15590
    case IX86_BUILTIN_PFACC:
15591
      return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
15592
 
15593
    case IX86_BUILTIN_PFADD:
15594
     return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
15595
 
15596
    case IX86_BUILTIN_PFCMPEQ:
15597
      return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
15598
 
15599
    case IX86_BUILTIN_PFCMPGE:
15600
      return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
15601
 
15602
    case IX86_BUILTIN_PFCMPGT:
15603
      return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
15604
 
15605
    case IX86_BUILTIN_PFMAX:
15606
      return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
15607
 
15608
    case IX86_BUILTIN_PFMIN:
15609
      return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
15610
 
15611
    case IX86_BUILTIN_PFMUL:
15612
      return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
15613
 
15614
    case IX86_BUILTIN_PFRCP:
15615
      return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
15616
 
15617
    case IX86_BUILTIN_PFRCPIT1:
15618
      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
15619
 
15620
    case IX86_BUILTIN_PFRCPIT2:
15621
      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
15622
 
15623
    case IX86_BUILTIN_PFRSQIT1:
15624
      return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
15625
 
15626
    case IX86_BUILTIN_PFRSQRT:
15627
      return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
15628
 
15629
    case IX86_BUILTIN_PFSUB:
15630
      return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
15631
 
15632
    case IX86_BUILTIN_PFSUBR:
15633
      return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
15634
 
15635
    case IX86_BUILTIN_PI2FD:
15636
      return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
15637
 
15638
    case IX86_BUILTIN_PMULHRW:
15639
      return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
15640
 
15641
    case IX86_BUILTIN_PF2IW:
15642
      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
15643
 
15644
    case IX86_BUILTIN_PFNACC:
15645
      return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
15646
 
15647
    case IX86_BUILTIN_PFPNACC:
15648
      return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
15649
 
15650
    case IX86_BUILTIN_PI2FW:
15651
      return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
15652
 
15653
    case IX86_BUILTIN_PSWAPDSI:
15654
      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
15655
 
15656
    case IX86_BUILTIN_PSWAPDSF:
15657
      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
15658
 
15659
    case IX86_BUILTIN_SQRTSD:
15660
      return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
15661
    case IX86_BUILTIN_LOADUPD:
15662
      return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
15663
    case IX86_BUILTIN_STOREUPD:
15664
      return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
15665
 
15666
    case IX86_BUILTIN_MFENCE:
15667
        emit_insn (gen_sse2_mfence ());
15668
        return 0;
15669
    case IX86_BUILTIN_LFENCE:
15670
        emit_insn (gen_sse2_lfence ());
15671
        return 0;
15672
 
15673
    case IX86_BUILTIN_CLFLUSH:
15674
        arg0 = TREE_VALUE (arglist);
15675
        op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15676
        icode = CODE_FOR_sse2_clflush;
15677
        if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
15678
            op0 = copy_to_mode_reg (Pmode, op0);
15679
 
15680
        emit_insn (gen_sse2_clflush (op0));
15681
        return 0;
15682
 
15683
    case IX86_BUILTIN_MOVNTPD:
15684
      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
15685
    case IX86_BUILTIN_MOVNTDQ:
15686
      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
15687
    case IX86_BUILTIN_MOVNTI:
15688
      return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
15689
 
15690
    case IX86_BUILTIN_LOADDQU:
15691
      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
15692
    case IX86_BUILTIN_STOREDQU:
15693
      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
15694
 
15695
    case IX86_BUILTIN_MONITOR:
15696
      arg0 = TREE_VALUE (arglist);
15697
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15698
      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15699
      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15700
      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15701
      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15702
      if (!REG_P (op0))
15703
        op0 = copy_to_mode_reg (SImode, op0);
15704
      if (!REG_P (op1))
15705
        op1 = copy_to_mode_reg (SImode, op1);
15706
      if (!REG_P (op2))
15707
        op2 = copy_to_mode_reg (SImode, op2);
15708
      emit_insn (gen_sse3_monitor (op0, op1, op2));
15709
      return 0;
15710
 
15711
    case IX86_BUILTIN_MWAIT:
15712
      arg0 = TREE_VALUE (arglist);
15713
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15714
      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15715
      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15716
      if (!REG_P (op0))
15717
        op0 = copy_to_mode_reg (SImode, op0);
15718
      if (!REG_P (op1))
15719
        op1 = copy_to_mode_reg (SImode, op1);
15720
      emit_insn (gen_sse3_mwait (op0, op1));
15721
      return 0;
15722
 
15723
    case IX86_BUILTIN_LDDQU:
15724
      return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
15725
                                       target, 1);
15726
 
15727
    case IX86_BUILTIN_VEC_INIT_V2SI:
15728
    case IX86_BUILTIN_VEC_INIT_V4HI:
15729
    case IX86_BUILTIN_VEC_INIT_V8QI:
15730
      return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
15731
 
15732
    case IX86_BUILTIN_VEC_EXT_V2DF:
15733
    case IX86_BUILTIN_VEC_EXT_V2DI:
15734
    case IX86_BUILTIN_VEC_EXT_V4SF:
15735
    case IX86_BUILTIN_VEC_EXT_V4SI:
15736
    case IX86_BUILTIN_VEC_EXT_V8HI:
15737
    case IX86_BUILTIN_VEC_EXT_V2SI:
15738
    case IX86_BUILTIN_VEC_EXT_V4HI:
15739
      return ix86_expand_vec_ext_builtin (arglist, target);
15740
 
15741
    case IX86_BUILTIN_VEC_SET_V8HI:
15742
    case IX86_BUILTIN_VEC_SET_V4HI:
15743
      return ix86_expand_vec_set_builtin (arglist);
15744
 
15745
    default:
15746
      break;
15747
    }
15748
 
15749
  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15750
    if (d->code == fcode)
15751
      {
15752
        /* Compares are treated specially.  */
15753
        if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15754
            || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
15755
            || d->icode == CODE_FOR_sse2_maskcmpv2df3
15756
            || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15757
          return ix86_expand_sse_compare (d, arglist, target);
15758
 
15759
        return ix86_expand_binop_builtin (d->icode, arglist, target);
15760
      }
15761
 
15762
  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15763
    if (d->code == fcode)
15764
      return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
15765
 
15766
  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15767
    if (d->code == fcode)
15768
      return ix86_expand_sse_comi (d, arglist, target);
15769
 
15770
  gcc_unreachable ();
15771
}
15772
 
15773
/* Store OPERAND to the memory after reload is completed.  This means
15774
   that we can't easily use assign_stack_local.  */
15775
rtx
15776
ix86_force_to_memory (enum machine_mode mode, rtx operand)
15777
{
15778
  rtx result;
15779
 
15780
  gcc_assert (reload_completed);
15781
  if (TARGET_RED_ZONE)
15782
    {
15783
      result = gen_rtx_MEM (mode,
15784
                            gen_rtx_PLUS (Pmode,
15785
                                          stack_pointer_rtx,
15786
                                          GEN_INT (-RED_ZONE_SIZE)));
15787
      emit_move_insn (result, operand);
15788
    }
15789
  else if (!TARGET_RED_ZONE && TARGET_64BIT)
15790
    {
15791
      switch (mode)
15792
        {
15793
        case HImode:
15794
        case SImode:
15795
          operand = gen_lowpart (DImode, operand);
15796
          /* FALLTHRU */
15797
        case DImode:
15798
          emit_insn (
15799
                      gen_rtx_SET (VOIDmode,
15800
                                   gen_rtx_MEM (DImode,
15801
                                                gen_rtx_PRE_DEC (DImode,
15802
                                                        stack_pointer_rtx)),
15803
                                   operand));
15804
          break;
15805
        default:
15806
          gcc_unreachable ();
15807
        }
15808
      result = gen_rtx_MEM (mode, stack_pointer_rtx);
15809
    }
15810
  else
15811
    {
15812
      switch (mode)
15813
        {
15814
        case DImode:
15815
          {
15816
            rtx operands[2];
15817
            split_di (&operand, 1, operands, operands + 1);
15818
            emit_insn (
15819
                        gen_rtx_SET (VOIDmode,
15820
                                     gen_rtx_MEM (SImode,
15821
                                                  gen_rtx_PRE_DEC (Pmode,
15822
                                                        stack_pointer_rtx)),
15823
                                     operands[1]));
15824
            emit_insn (
15825
                        gen_rtx_SET (VOIDmode,
15826
                                     gen_rtx_MEM (SImode,
15827
                                                  gen_rtx_PRE_DEC (Pmode,
15828
                                                        stack_pointer_rtx)),
15829
                                     operands[0]));
15830
          }
15831
          break;
15832
        case HImode:
15833
          /* Store HImodes as SImodes.  */
15834
          operand = gen_lowpart (SImode, operand);
15835
          /* FALLTHRU */
15836
        case SImode:
15837
          emit_insn (
15838
                      gen_rtx_SET (VOIDmode,
15839
                                   gen_rtx_MEM (GET_MODE (operand),
15840
                                                gen_rtx_PRE_DEC (SImode,
15841
                                                        stack_pointer_rtx)),
15842
                                   operand));
15843
          break;
15844
        default:
15845
          gcc_unreachable ();
15846
        }
15847
      result = gen_rtx_MEM (mode, stack_pointer_rtx);
15848
    }
15849
  return result;
15850
}
15851
 
15852
/* Free operand from the memory.  */
15853
void
15854
ix86_free_from_memory (enum machine_mode mode)
15855
{
15856
  if (!TARGET_RED_ZONE)
15857
    {
15858
      int size;
15859
 
15860
      if (mode == DImode || TARGET_64BIT)
15861
        size = 8;
15862
      else
15863
        size = 4;
15864
      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
15865
         to pop or add instruction if registers are available.  */
15866
      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15867
                              gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15868
                                            GEN_INT (size))));
15869
    }
15870
}
15871
 
15872
/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
15873
   QImode must go into class Q_REGS.
15874
   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
15875
   movdf to do mem-to-mem moves through integer regs.  */
15876
enum reg_class
15877
ix86_preferred_reload_class (rtx x, enum reg_class class)
15878
{
15879
  /* We're only allowed to return a subclass of CLASS.  Many of the
15880
     following checks fail for NO_REGS, so eliminate that early.  */
15881
  if (class == NO_REGS)
15882
    return NO_REGS;
15883
 
15884
  /* All classes can load zeros.  */
15885
  if (x == CONST0_RTX (GET_MODE (x)))
15886
    return class;
15887
 
15888
  /* Floating-point constants need more complex checks.  */
15889
  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
15890
    {
15891
      /* General regs can load everything.  */
15892
      if (reg_class_subset_p (class, GENERAL_REGS))
15893
        return class;
15894
 
15895
      /* Floats can load 0 and 1 plus some others.  Note that we eliminated
15896
         zero above.  We only want to wind up preferring 80387 registers if
15897
         we plan on doing computation with them.  */
15898
      if (TARGET_80387
15899
          && (TARGET_MIX_SSE_I387
15900
              || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
15901
          && standard_80387_constant_p (x))
15902
        {
15903
          /* Limit class to non-sse.  */
15904
          if (class == FLOAT_SSE_REGS)
15905
            return FLOAT_REGS;
15906
          if (class == FP_TOP_SSE_REGS)
15907
            return FP_TOP_REG;
15908
          if (class == FP_SECOND_SSE_REGS)
15909
            return FP_SECOND_REG;
15910
          if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
15911
            return class;
15912
        }
15913
 
15914
      return NO_REGS;
15915
    }
15916
  if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
15917
    return NO_REGS;
15918
  if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
15919
    return NO_REGS;
15920
 
15921
  /* Generally when we see PLUS here, it's the function invariant
15922
     (plus soft-fp const_int).  Which can only be computed into general
15923
     regs.  */
15924
  if (GET_CODE (x) == PLUS)
15925
    return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
15926
 
15927
  /* QImode constants are easy to load, but non-constant QImode data
15928
     must go into Q_REGS.  */
15929
  if (GET_MODE (x) == QImode && !CONSTANT_P (x))
15930
    {
15931
      if (reg_class_subset_p (class, Q_REGS))
15932
        return class;
15933
      if (reg_class_subset_p (Q_REGS, class))
15934
        return Q_REGS;
15935
      return NO_REGS;
15936
    }
15937
 
15938
  return class;
15939
}
15940
 
15941
/* If we are copying between general and FP registers, we need a memory
15942
   location. The same is true for SSE and MMX registers.
15943
 
15944
   The macro can't work reliably when one of the CLASSES is class containing
15945
   registers from multiple units (SSE, MMX, integer).  We avoid this by never
15946
   combining those units in single alternative in the machine description.
15947
   Ensure that this constraint holds to avoid unexpected surprises.
15948
 
15949
   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
15950
   enforce these sanity checks.  */
15951
 
15952
int
15953
ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
15954
                              enum machine_mode mode, int strict)
15955
{
15956
  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
15957
      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
15958
      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
15959
      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
15960
      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
15961
      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
15962
    {
15963
      gcc_assert (!strict);
15964
      return true;
15965
    }
15966
 
15967
  if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
15968
    return true;
15969
 
15970
  /* ??? This is a lie.  We do have moves between mmx/general, and for
15971
     mmx/sse2.  But by saying we need secondary memory we discourage the
15972
     register allocator from using the mmx registers unless needed.  */
15973
  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
15974
    return true;
15975
 
15976
  if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
15977
    {
15978
      /* SSE1 doesn't have any direct moves from other classes.  */
15979
      if (!TARGET_SSE2)
15980
        return true;
15981
 
15982
      /* If the target says that inter-unit moves are more expensive
15983
         than moving through memory, then don't generate them.  */
15984
      if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
15985
        return true;
15986
 
15987
      /* Between SSE and general, we have moves no larger than word size.  */
15988
      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15989
        return true;
15990
 
15991
      /* ??? For the cost of one register reformat penalty, we could use
15992
         the same instructions to move SFmode and DFmode data, but the
15993
         relevant move patterns don't support those alternatives.  */
15994
      if (mode == SFmode || mode == DFmode)
15995
        return true;
15996
    }
15997
 
15998
  return false;
15999
}
16000
 
16001
/* Return true if the registers in CLASS cannot represent the change from
16002
   modes FROM to TO.  */
16003
 
16004
bool
16005
ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16006
                               enum reg_class class)
16007
{
16008
  if (from == to)
16009
    return false;
16010
 
16011
  /* x87 registers can't do subreg at all, as all values are reformatted
16012
     to extended precision.  */
16013
  if (MAYBE_FLOAT_CLASS_P (class))
16014
    return true;
16015
 
16016
  if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16017
    {
16018
      /* Vector registers do not support QI or HImode loads.  If we don't
16019
         disallow a change to these modes, reload will assume it's ok to
16020
         drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
16021
         the vec_dupv4hi pattern.  */
16022
      if (GET_MODE_SIZE (from) < 4)
16023
        return true;
16024
 
16025
      /* Vector registers do not support subreg with nonzero offsets, which
16026
         are otherwise valid for integer registers.  Since we can't see
16027
         whether we have a nonzero offset from here, prohibit all
16028
         nonparadoxical subregs changing size.  */
16029
      if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16030
        return true;
16031
    }
16032
 
16033
  return false;
16034
}
16035
 
16036
/* Return the cost of moving data from a register in class CLASS1 to
16037
   one in class CLASS2.
16038
 
16039
   It is not required that the cost always equal 2 when FROM is the same as TO;
16040
   on some machines it is expensive to move between registers if they are not
16041
   general registers.  */
16042
 
16043
int
16044
ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16045
                         enum reg_class class2)
16046
{
16047
  /* In case we require secondary memory, compute cost of the store followed
16048
     by load.  In order to avoid bad register allocation choices, we need
16049
     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
16050
 
16051
  if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16052
    {
16053
      int cost = 1;
16054
 
16055
      cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16056
                   MEMORY_MOVE_COST (mode, class1, 1));
16057
      cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16058
                   MEMORY_MOVE_COST (mode, class2, 1));
16059
 
16060
      /* In case of copying from general_purpose_register we may emit multiple
16061
         stores followed by single load causing memory size mismatch stall.
16062
         Count this as arbitrarily high cost of 20.  */
16063
      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16064
        cost += 20;
16065
 
16066
      /* In the case of FP/MMX moves, the registers actually overlap, and we
16067
         have to switch modes in order to treat them differently.  */
16068
      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16069
          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16070
        cost += 20;
16071
 
16072
      return cost;
16073
    }
16074
 
16075
  /* Moves between SSE/MMX and integer unit are expensive.  */
16076
  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16077
      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16078
    return ix86_cost->mmxsse_to_integer;
16079
  if (MAYBE_FLOAT_CLASS_P (class1))
16080
    return ix86_cost->fp_move;
16081
  if (MAYBE_SSE_CLASS_P (class1))
16082
    return ix86_cost->sse_move;
16083
  if (MAYBE_MMX_CLASS_P (class1))
16084
    return ix86_cost->mmx_move;
16085
  return 2;
16086
}
16087
 
16088
/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
16089
 
16090
bool
16091
ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16092
{
16093
  /* Flags and only flags can only hold CCmode values.  */
16094
  if (CC_REGNO_P (regno))
16095
    return GET_MODE_CLASS (mode) == MODE_CC;
16096
  if (GET_MODE_CLASS (mode) == MODE_CC
16097
      || GET_MODE_CLASS (mode) == MODE_RANDOM
16098
      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16099
    return 0;
16100
  if (FP_REGNO_P (regno))
16101
    return VALID_FP_MODE_P (mode);
16102
  if (SSE_REGNO_P (regno))
16103
    {
16104
      /* We implement the move patterns for all vector modes into and
16105
         out of SSE registers, even when no operation instructions
16106
         are available.  */
16107
      return (VALID_SSE_REG_MODE (mode)
16108
              || VALID_SSE2_REG_MODE (mode)
16109
              || VALID_MMX_REG_MODE (mode)
16110
              || VALID_MMX_REG_MODE_3DNOW (mode));
16111
    }
16112
  if (MMX_REGNO_P (regno))
16113
    {
16114
      /* We implement the move patterns for 3DNOW modes even in MMX mode,
16115
         so if the register is available at all, then we can move data of
16116
         the given mode into or out of it.  */
16117
      return (VALID_MMX_REG_MODE (mode)
16118
              || VALID_MMX_REG_MODE_3DNOW (mode));
16119
    }
16120
 
16121
  if (mode == QImode)
16122
    {
16123
      /* Take care for QImode values - they can be in non-QI regs,
16124
         but then they do cause partial register stalls.  */
16125
      if (regno < 4 || TARGET_64BIT)
16126
        return 1;
16127
      if (!TARGET_PARTIAL_REG_STALL)
16128
        return 1;
16129
      return reload_in_progress || reload_completed;
16130
    }
16131
  /* We handle both integer and floats in the general purpose registers.  */
16132
  else if (VALID_INT_MODE_P (mode))
16133
    return 1;
16134
  else if (VALID_FP_MODE_P (mode))
16135
    return 1;
16136
  /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
16137
     on to use that value in smaller contexts, this can easily force a
16138
     pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
16139
     supporting DImode, allow it.  */
16140
  else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16141
    return 1;
16142
 
16143
  return 0;
16144
}
16145
 
16146
/* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
16147
   tieable integer mode.  */
16148
 
16149
static bool
16150
ix86_tieable_integer_mode_p (enum machine_mode mode)
16151
{
16152
  switch (mode)
16153
    {
16154
    case HImode:
16155
    case SImode:
16156
      return true;
16157
 
16158
    case QImode:
16159
      return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16160
 
16161
    case DImode:
16162
      return TARGET_64BIT;
16163
 
16164
    default:
16165
      return false;
16166
    }
16167
}
16168
 
16169
/* Return true if MODE1 is accessible in a register that can hold MODE2
16170
   without copying.  That is, all register classes that can hold MODE2
16171
   can also hold MODE1.  */
16172
 
16173
bool
16174
ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16175
{
16176
  if (mode1 == mode2)
16177
    return true;
16178
 
16179
  if (ix86_tieable_integer_mode_p (mode1)
16180
      && ix86_tieable_integer_mode_p (mode2))
16181
    return true;
16182
 
16183
  /* MODE2 being XFmode implies fp stack or general regs, which means we
16184
     can tie any smaller floating point modes to it.  Note that we do not
16185
     tie this with TFmode.  */
16186
  if (mode2 == XFmode)
16187
    return mode1 == SFmode || mode1 == DFmode;
16188
 
16189
  /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16190
     that we can tie it with SFmode.  */
16191
  if (mode2 == DFmode)
16192
    return mode1 == SFmode;
16193
 
16194
  /* If MODE2 is only appropriate for an SSE register, then tie with
16195
     any other mode acceptable to SSE registers.  */
16196
  if (GET_MODE_SIZE (mode2) >= 8
16197
      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16198
    return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16199
 
16200
  /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16201
     with any other mode acceptable to MMX registers.  */
16202
  if (GET_MODE_SIZE (mode2) == 8
16203
      && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
16204
    return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16205
 
16206
  return false;
16207
}
16208
 
16209
/* Return the cost of moving data of mode M between a
16210
   register and memory.  A value of 2 is the default; this cost is
16211
   relative to those in `REGISTER_MOVE_COST'.
16212
 
16213
   If moving between registers and memory is more expensive than
16214
   between two registers, you should define this macro to express the
16215
   relative cost.
16216
 
16217
   Model also increased moving costs of QImode registers in non
16218
   Q_REGS classes.
16219
 */
16220
int
16221
ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
16222
{
16223
  if (FLOAT_CLASS_P (class))
16224
    {
16225
      int index;
16226
      switch (mode)
16227
        {
16228
          case SFmode:
16229
            index = 0;
16230
            break;
16231
          case DFmode:
16232
            index = 1;
16233
            break;
16234
          case XFmode:
16235
            index = 2;
16236
            break;
16237
          default:
16238
            return 100;
16239
        }
16240
      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16241
    }
16242
  if (SSE_CLASS_P (class))
16243
    {
16244
      int index;
16245
      switch (GET_MODE_SIZE (mode))
16246
        {
16247
          case 4:
16248
            index = 0;
16249
            break;
16250
          case 8:
16251
            index = 1;
16252
            break;
16253
          case 16:
16254
            index = 2;
16255
            break;
16256
          default:
16257
            return 100;
16258
        }
16259
      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16260
    }
16261
  if (MMX_CLASS_P (class))
16262
    {
16263
      int index;
16264
      switch (GET_MODE_SIZE (mode))
16265
        {
16266
          case 4:
16267
            index = 0;
16268
            break;
16269
          case 8:
16270
            index = 1;
16271
            break;
16272
          default:
16273
            return 100;
16274
        }
16275
      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16276
    }
16277
  switch (GET_MODE_SIZE (mode))
16278
    {
16279
      case 1:
16280
        if (in)
16281
          return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16282
                  : ix86_cost->movzbl_load);
16283
        else
16284
          return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16285
                  : ix86_cost->int_store[0] + 4);
16286
        break;
16287
      case 2:
16288
        return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16289
      default:
16290
        /* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
16291
        if (mode == TFmode)
16292
          mode = XFmode;
16293
        return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
16294
                * (((int) GET_MODE_SIZE (mode)
16295
                    + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
16296
    }
16297
}
16298
 
16299
/* Compute a (partial) cost for rtx X.  Return true if the complete
16300
   cost has been computed, and false if subexpressions should be
16301
   scanned.  In either case, *TOTAL contains the cost result.  */
16302
 
16303
static bool
16304
ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
16305
{
16306
  enum machine_mode mode = GET_MODE (x);
16307
 
16308
  switch (code)
16309
    {
16310
    case CONST_INT:
16311
    case CONST:
16312
    case LABEL_REF:
16313
    case SYMBOL_REF:
16314
      if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
16315
        *total = 3;
16316
      else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
16317
        *total = 2;
16318
      else if (flag_pic && SYMBOLIC_CONST (x)
16319
               && (!TARGET_64BIT
16320
                   || (!GET_CODE (x) != LABEL_REF
16321
                       && (GET_CODE (x) != SYMBOL_REF
16322
                           || !SYMBOL_REF_LOCAL_P (x)))))
16323
        *total = 1;
16324
      else
16325
        *total = 0;
16326
      return true;
16327
 
16328
    case CONST_DOUBLE:
16329
      if (mode == VOIDmode)
16330
        *total = 0;
16331
      else
16332
        switch (standard_80387_constant_p (x))
16333
          {
16334
          case 1: /* 0.0 */
16335
            *total = 1;
16336
            break;
16337
          default: /* Other constants */
16338
            *total = 2;
16339
            break;
16340
          case 0:
16341
          case -1:
16342
            /* Start with (MEM (SYMBOL_REF)), since that's where
16343
               it'll probably end up.  Add a penalty for size.  */
16344
            *total = (COSTS_N_INSNS (1)
16345
                      + (flag_pic != 0 && !TARGET_64BIT)
16346
                      + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
16347
            break;
16348
          }
16349
      return true;
16350
 
16351
    case ZERO_EXTEND:
16352
      /* The zero extensions is often completely free on x86_64, so make
16353
         it as cheap as possible.  */
16354
      if (TARGET_64BIT && mode == DImode
16355
          && GET_MODE (XEXP (x, 0)) == SImode)
16356
        *total = 1;
16357
      else if (TARGET_ZERO_EXTEND_WITH_AND)
16358
        *total = COSTS_N_INSNS (ix86_cost->add);
16359
      else
16360
        *total = COSTS_N_INSNS (ix86_cost->movzx);
16361
      return false;
16362
 
16363
    case SIGN_EXTEND:
16364
      *total = COSTS_N_INSNS (ix86_cost->movsx);
16365
      return false;
16366
 
16367
    case ASHIFT:
16368
      if (GET_CODE (XEXP (x, 1)) == CONST_INT
16369
          && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
16370
        {
16371
          HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16372
          if (value == 1)
16373
            {
16374
              *total = COSTS_N_INSNS (ix86_cost->add);
16375
              return false;
16376
            }
16377
          if ((value == 2 || value == 3)
16378
              && ix86_cost->lea <= ix86_cost->shift_const)
16379
            {
16380
              *total = COSTS_N_INSNS (ix86_cost->lea);
16381
              return false;
16382
            }
16383
        }
16384
      /* FALLTHRU */
16385
 
16386
    case ROTATE:
16387
    case ASHIFTRT:
16388
    case LSHIFTRT:
16389
    case ROTATERT:
16390
      if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
16391
        {
16392
          if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16393
            {
16394
              if (INTVAL (XEXP (x, 1)) > 32)
16395
                *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
16396
              else
16397
                *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
16398
            }
16399
          else
16400
            {
16401
              if (GET_CODE (XEXP (x, 1)) == AND)
16402
                *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
16403
              else
16404
                *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
16405
            }
16406
        }
16407
      else
16408
        {
16409
          if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16410
            *total = COSTS_N_INSNS (ix86_cost->shift_const);
16411
          else
16412
            *total = COSTS_N_INSNS (ix86_cost->shift_var);
16413
        }
16414
      return false;
16415
 
16416
    case MULT:
16417
      if (FLOAT_MODE_P (mode))
16418
        {
16419
          *total = COSTS_N_INSNS (ix86_cost->fmul);
16420
          return false;
16421
        }
16422
      else
16423
        {
16424
          rtx op0 = XEXP (x, 0);
16425
          rtx op1 = XEXP (x, 1);
16426
          int nbits;
16427
          if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16428
            {
16429
              unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16430
              for (nbits = 0; value != 0; value &= value - 1)
16431
                nbits++;
16432
            }
16433
          else
16434
            /* This is arbitrary.  */
16435
            nbits = 7;
16436
 
16437
          /* Compute costs correctly for widening multiplication.  */
16438
          if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
16439
              && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
16440
                 == GET_MODE_SIZE (mode))
16441
            {
16442
              int is_mulwiden = 0;
16443
              enum machine_mode inner_mode = GET_MODE (op0);
16444
 
16445
              if (GET_CODE (op0) == GET_CODE (op1))
16446
                is_mulwiden = 1, op1 = XEXP (op1, 0);
16447
              else if (GET_CODE (op1) == CONST_INT)
16448
                {
16449
                  if (GET_CODE (op0) == SIGN_EXTEND)
16450
                    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
16451
                                  == INTVAL (op1);
16452
                  else
16453
                    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
16454
                }
16455
 
16456
              if (is_mulwiden)
16457
                op0 = XEXP (op0, 0), mode = GET_MODE (op0);
16458
            }
16459
 
16460
          *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
16461
                                  + nbits * ix86_cost->mult_bit)
16462
                   + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
16463
 
16464
          return true;
16465
        }
16466
 
16467
    case DIV:
16468
    case UDIV:
16469
    case MOD:
16470
    case UMOD:
16471
      if (FLOAT_MODE_P (mode))
16472
        *total = COSTS_N_INSNS (ix86_cost->fdiv);
16473
      else
16474
        *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
16475
      return false;
16476
 
16477
    case PLUS:
16478
      if (FLOAT_MODE_P (mode))
16479
        *total = COSTS_N_INSNS (ix86_cost->fadd);
16480
      else if (GET_MODE_CLASS (mode) == MODE_INT
16481
               && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
16482
        {
16483
          if (GET_CODE (XEXP (x, 0)) == PLUS
16484
              && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
16485
              && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
16486
              && CONSTANT_P (XEXP (x, 1)))
16487
            {
16488
              HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
16489
              if (val == 2 || val == 4 || val == 8)
16490
                {
16491
                  *total = COSTS_N_INSNS (ix86_cost->lea);
16492
                  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16493
                  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
16494
                                      outer_code);
16495
                  *total += rtx_cost (XEXP (x, 1), outer_code);
16496
                  return true;
16497
                }
16498
            }
16499
          else if (GET_CODE (XEXP (x, 0)) == MULT
16500
                   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
16501
            {
16502
              HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
16503
              if (val == 2 || val == 4 || val == 8)
16504
                {
16505
                  *total = COSTS_N_INSNS (ix86_cost->lea);
16506
                  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16507
                  *total += rtx_cost (XEXP (x, 1), outer_code);
16508
                  return true;
16509
                }
16510
            }
16511
          else if (GET_CODE (XEXP (x, 0)) == PLUS)
16512
            {
16513
              *total = COSTS_N_INSNS (ix86_cost->lea);
16514
              *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16515
              *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16516
              *total += rtx_cost (XEXP (x, 1), outer_code);
16517
              return true;
16518
            }
16519
        }
16520
      /* FALLTHRU */
16521
 
16522
    case MINUS:
16523
      if (FLOAT_MODE_P (mode))
16524
        {
16525
          *total = COSTS_N_INSNS (ix86_cost->fadd);
16526
          return false;
16527
        }
16528
      /* FALLTHRU */
16529
 
16530
    case AND:
16531
    case IOR:
16532
    case XOR:
16533
      if (!TARGET_64BIT && mode == DImode)
16534
        {
16535
          *total = (COSTS_N_INSNS (ix86_cost->add) * 2
16536
                    + (rtx_cost (XEXP (x, 0), outer_code)
16537
                       << (GET_MODE (XEXP (x, 0)) != DImode))
16538
                    + (rtx_cost (XEXP (x, 1), outer_code)
16539
                       << (GET_MODE (XEXP (x, 1)) != DImode)));
16540
          return true;
16541
        }
16542
      /* FALLTHRU */
16543
 
16544
    case NEG:
16545
      if (FLOAT_MODE_P (mode))
16546
        {
16547
          *total = COSTS_N_INSNS (ix86_cost->fchs);
16548
          return false;
16549
        }
16550
      /* FALLTHRU */
16551
 
16552
    case NOT:
16553
      if (!TARGET_64BIT && mode == DImode)
16554
        *total = COSTS_N_INSNS (ix86_cost->add * 2);
16555
      else
16556
        *total = COSTS_N_INSNS (ix86_cost->add);
16557
      return false;
16558
 
16559
    case COMPARE:
16560
      if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
16561
          && XEXP (XEXP (x, 0), 1) == const1_rtx
16562
          && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
16563
          && XEXP (x, 1) == const0_rtx)
16564
        {
16565
          /* This kind of construct is implemented using test[bwl].
16566
             Treat it as if we had an AND.  */
16567
          *total = (COSTS_N_INSNS (ix86_cost->add)
16568
                    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
16569
                    + rtx_cost (const1_rtx, outer_code));
16570
          return true;
16571
        }
16572
      return false;
16573
 
16574
    case FLOAT_EXTEND:
16575
      if (!TARGET_SSE_MATH
16576
          || mode == XFmode
16577
          || (mode == DFmode && !TARGET_SSE2))
16578
        *total = 0;
16579
      return false;
16580
 
16581
    case ABS:
16582
      if (FLOAT_MODE_P (mode))
16583
        *total = COSTS_N_INSNS (ix86_cost->fabs);
16584
      return false;
16585
 
16586
    case SQRT:
16587
      if (FLOAT_MODE_P (mode))
16588
        *total = COSTS_N_INSNS (ix86_cost->fsqrt);
16589
      return false;
16590
 
16591
    case UNSPEC:
16592
      if (XINT (x, 1) == UNSPEC_TP)
16593
        *total = 0;
16594
      return false;
16595
 
16596
    default:
16597
      return false;
16598
    }
16599
}
16600
 
16601
#if TARGET_MACHO
16602
 
16603
static int current_machopic_label_num;
16604
 
16605
/* Given a symbol name and its associated stub, write out the
16606
   definition of the stub.  */
16607
 
16608
void
16609
machopic_output_stub (FILE *file, const char *symb, const char *stub)
16610
{
16611
  unsigned int length;
16612
  char *binder_name, *symbol_name, lazy_ptr_name[32];
16613
  int label = ++current_machopic_label_num;
16614
 
16615
  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
16616
  symb = (*targetm.strip_name_encoding) (symb);
16617
 
16618
  length = strlen (stub);
16619
  binder_name = alloca (length + 32);
16620
  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
16621
 
16622
  length = strlen (symb);
16623
  symbol_name = alloca (length + 32);
16624
  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
16625
 
16626
  sprintf (lazy_ptr_name, "L%d$lz", label);
16627
 
16628
  if (MACHOPIC_PURE)
16629
    machopic_picsymbol_stub_section ();
16630
  else
16631
    machopic_symbol_stub_section ();
16632
 
16633
  fprintf (file, "%s:\n", stub);
16634
  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
16635
 
16636
  if (MACHOPIC_PURE)
16637
    {
16638
      fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
16639
      fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
16640
      fprintf (file, "\tjmp %%edx\n");
16641
    }
16642
  else
16643
    fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
16644
 
16645
  fprintf (file, "%s:\n", binder_name);
16646
 
16647
  if (MACHOPIC_PURE)
16648
    {
16649
      fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
16650
      fprintf (file, "\tpushl %%eax\n");
16651
    }
16652
  else
16653
    fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
16654
 
16655
  fprintf (file, "\tjmp dyld_stub_binding_helper\n");
16656
 
16657
  machopic_lazy_symbol_ptr_section ();
16658
  fprintf (file, "%s:\n", lazy_ptr_name);
16659
  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
16660
  fprintf (file, "\t.long %s\n", binder_name);
16661
}
16662
#endif /* TARGET_MACHO */
16663
 
16664
/* Order the registers for register allocator.  */
16665
 
16666
void
16667
x86_order_regs_for_local_alloc (void)
16668
{
16669
   int pos = 0;
16670
   int i;
16671
 
16672
   /* First allocate the local general purpose registers.  */
16673
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
16674
     if (GENERAL_REGNO_P (i) && call_used_regs[i])
16675
        reg_alloc_order [pos++] = i;
16676
 
16677
   /* Global general purpose registers.  */
16678
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
16679
     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
16680
        reg_alloc_order [pos++] = i;
16681
 
16682
   /* x87 registers come first in case we are doing FP math
16683
      using them.  */
16684
   if (!TARGET_SSE_MATH)
16685
     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
16686
       reg_alloc_order [pos++] = i;
16687
 
16688
   /* SSE registers.  */
16689
   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16690
     reg_alloc_order [pos++] = i;
16691
   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16692
     reg_alloc_order [pos++] = i;
16693
 
16694
   /* x87 registers.  */
16695
   if (TARGET_SSE_MATH)
16696
     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
16697
       reg_alloc_order [pos++] = i;
16698
 
16699
   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
16700
     reg_alloc_order [pos++] = i;
16701
 
16702
   /* Initialize the rest of array as we do not allocate some registers
16703
      at all.  */
16704
   while (pos < FIRST_PSEUDO_REGISTER)
16705
     reg_alloc_order [pos++] = 0;
16706
}
16707
 
16708
/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
16709
   struct attribute_spec.handler.  */
16710
static tree
16711
ix86_handle_struct_attribute (tree *node, tree name,
16712
                              tree args ATTRIBUTE_UNUSED,
16713
                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
16714
{
16715
  tree *type = NULL;
16716
  if (DECL_P (*node))
16717
    {
16718
      if (TREE_CODE (*node) == TYPE_DECL)
16719
        type = &TREE_TYPE (*node);
16720
    }
16721
  else
16722
    type = node;
16723
 
16724
  if (!(type && (TREE_CODE (*type) == RECORD_TYPE
16725
                 || TREE_CODE (*type) == UNION_TYPE)))
16726
    {
16727
      warning (OPT_Wattributes, "%qs attribute ignored",
16728
               IDENTIFIER_POINTER (name));
16729
      *no_add_attrs = true;
16730
    }
16731
 
16732
  else if ((is_attribute_p ("ms_struct", name)
16733
            && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
16734
           || ((is_attribute_p ("gcc_struct", name)
16735
                && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
16736
    {
16737
      warning (OPT_Wattributes, "%qs incompatible attribute ignored",
16738
               IDENTIFIER_POINTER (name));
16739
      *no_add_attrs = true;
16740
    }
16741
 
16742
  return NULL_TREE;
16743
}
16744
 
16745
static bool
16746
ix86_ms_bitfield_layout_p (tree record_type)
16747
{
16748
  return (TARGET_MS_BITFIELD_LAYOUT &&
16749
          !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
16750
    || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
16751
}
16752
 
16753
/* Returns an expression indicating where the this parameter is
16754
   located on entry to the FUNCTION.  */
16755
 
16756
static rtx
16757
x86_this_parameter (tree function)
16758
{
16759
  tree type = TREE_TYPE (function);
16760
 
16761
  if (TARGET_64BIT)
16762
    {
16763
      int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
16764
      return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
16765
    }
16766
 
16767
  if (ix86_function_regparm (type, function) > 0)
16768
    {
16769
      tree parm;
16770
 
16771
      parm = TYPE_ARG_TYPES (type);
16772
      /* Figure out whether or not the function has a variable number of
16773
         arguments.  */
16774
      for (; parm; parm = TREE_CHAIN (parm))
16775
        if (TREE_VALUE (parm) == void_type_node)
16776
          break;
16777
      /* If not, the this parameter is in the first argument.  */
16778
      if (parm)
16779
        {
16780
          int regno = 0;
16781
          if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
16782
            regno = 2;
16783
          return gen_rtx_REG (SImode, regno);
16784
        }
16785
    }
16786
 
16787
  if (aggregate_value_p (TREE_TYPE (type), type))
16788
    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
16789
  else
16790
    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
16791
}
16792
 
16793
/* Determine whether x86_output_mi_thunk can succeed.  */
16794
 
16795
static bool
16796
x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
16797
                         HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
16798
                         HOST_WIDE_INT vcall_offset, tree function)
16799
{
16800
  /* 64-bit can handle anything.  */
16801
  if (TARGET_64BIT)
16802
    return true;
16803
 
16804
  /* For 32-bit, everything's fine if we have one free register.  */
16805
  if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
16806
    return true;
16807
 
16808
  /* Need a free register for vcall_offset.  */
16809
  if (vcall_offset)
16810
    return false;
16811
 
16812
  /* Need a free register for GOT references.  */
16813
  if (flag_pic && !(*targetm.binds_local_p) (function))
16814
    return false;
16815
 
16816
  /* Otherwise ok.  */
16817
  return true;
16818
}
16819
 
16820
/* Output the assembler code for a thunk function.  THUNK_DECL is the
16821
   declaration for the thunk function itself, FUNCTION is the decl for
16822
   the target function.  DELTA is an immediate constant offset to be
16823
   added to THIS.  If VCALL_OFFSET is nonzero, the word at
16824
   *(*this + vcall_offset) should be added to THIS.  */
16825
 
16826
static void
16827
x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
16828
                     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
16829
                     HOST_WIDE_INT vcall_offset, tree function)
16830
{
16831
  rtx xops[3];
16832
  rtx this = x86_this_parameter (function);
16833
  rtx this_reg, tmp;
16834
 
16835
  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
16836
     pull it in now and let DELTA benefit.  */
16837
  if (REG_P (this))
16838
    this_reg = this;
16839
  else if (vcall_offset)
16840
    {
16841
      /* Put the this parameter into %eax.  */
16842
      xops[0] = this;
16843
      xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
16844
      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16845
    }
16846
  else
16847
    this_reg = NULL_RTX;
16848
 
16849
  /* Adjust the this parameter by a fixed constant.  */
16850
  if (delta)
16851
    {
16852
      xops[0] = GEN_INT (delta);
16853
      xops[1] = this_reg ? this_reg : this;
16854
      if (TARGET_64BIT)
16855
        {
16856
          if (!x86_64_general_operand (xops[0], DImode))
16857
            {
16858
              tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16859
              xops[1] = tmp;
16860
              output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
16861
              xops[0] = tmp;
16862
              xops[1] = this;
16863
            }
16864
          output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16865
        }
16866
      else
16867
        output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16868
    }
16869
 
16870
  /* Adjust the this parameter by a value stored in the vtable.  */
16871
  if (vcall_offset)
16872
    {
16873
      if (TARGET_64BIT)
16874
        tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16875
      else
16876
        {
16877
          int tmp_regno = 2 /* ECX */;
16878
          if (lookup_attribute ("fastcall",
16879
              TYPE_ATTRIBUTES (TREE_TYPE (function))))
16880
            tmp_regno = 0 /* EAX */;
16881
          tmp = gen_rtx_REG (SImode, tmp_regno);
16882
        }
16883
 
16884
      xops[0] = gen_rtx_MEM (Pmode, this_reg);
16885
      xops[1] = tmp;
16886
      if (TARGET_64BIT)
16887
        output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
16888
      else
16889
        output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16890
 
16891
      /* Adjust the this parameter.  */
16892
      xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
16893
      if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
16894
        {
16895
          rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
16896
          xops[0] = GEN_INT (vcall_offset);
16897
          xops[1] = tmp2;
16898
          output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
16899
          xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
16900
        }
16901
      xops[1] = this_reg;
16902
      if (TARGET_64BIT)
16903
        output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16904
      else
16905
        output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16906
    }
16907
 
16908
  /* If necessary, drop THIS back to its stack slot.  */
16909
  if (this_reg && this_reg != this)
16910
    {
16911
      xops[0] = this_reg;
16912
      xops[1] = this;
16913
      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16914
    }
16915
 
16916
  xops[0] = XEXP (DECL_RTL (function), 0);
16917
  if (TARGET_64BIT)
16918
    {
16919
      if (!flag_pic || (*targetm.binds_local_p) (function))
16920
        output_asm_insn ("jmp\t%P0", xops);
16921
      else
16922
        {
16923
          tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
16924
          tmp = gen_rtx_CONST (Pmode, tmp);
16925
          tmp = gen_rtx_MEM (QImode, tmp);
16926
          xops[0] = tmp;
16927
          output_asm_insn ("jmp\t%A0", xops);
16928
        }
16929
    }
16930
  else
16931
    {
16932
      if (!flag_pic || (*targetm.binds_local_p) (function))
16933
        output_asm_insn ("jmp\t%P0", xops);
16934
      else
16935
#if TARGET_MACHO
16936
        if (TARGET_MACHO)
16937
          {
16938
            rtx sym_ref = XEXP (DECL_RTL (function), 0);
16939
            tmp = (gen_rtx_SYMBOL_REF
16940
                   (Pmode,
16941
                    machopic_indirection_name (sym_ref, /*stub_p=*/true)));
16942
            tmp = gen_rtx_MEM (QImode, tmp);
16943
            xops[0] = tmp;
16944
            output_asm_insn ("jmp\t%0", xops);
16945
          }
16946
        else
16947
#endif /* TARGET_MACHO */
16948
        {
16949
          tmp = gen_rtx_REG (SImode, 2 /* ECX */);
16950
          output_set_got (tmp);
16951
 
16952
          xops[1] = tmp;
16953
          output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
16954
          output_asm_insn ("jmp\t{*}%1", xops);
16955
        }
16956
    }
16957
}
16958
 
16959
static void
16960
x86_file_start (void)
16961
{
16962
  default_file_start ();
16963
  if (X86_FILE_START_VERSION_DIRECTIVE)
16964
    fputs ("\t.version\t\"01.01\"\n", asm_out_file);
16965
  if (X86_FILE_START_FLTUSED)
16966
    fputs ("\t.global\t__fltused\n", asm_out_file);
16967
  if (ix86_asm_dialect == ASM_INTEL)
16968
    fputs ("\t.intel_syntax\n", asm_out_file);
16969
}
16970
 
16971
int
16972
x86_field_alignment (tree field, int computed)
16973
{
16974
  enum machine_mode mode;
16975
  tree type = TREE_TYPE (field);
16976
 
16977
  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
16978
    return computed;
16979
  mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
16980
                    ? get_inner_array_type (type) : type);
16981
  if (mode == DFmode || mode == DCmode
16982
      || GET_MODE_CLASS (mode) == MODE_INT
16983
      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
16984
    return MIN (32, computed);
16985
  return computed;
16986
}
16987
 
16988
/* Output assembler code to FILE to increment profiler label # LABELNO
16989
   for profiling a function entry.  */
16990
void
16991
x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
16992
{
16993
  if (TARGET_64BIT)
16994
    if (flag_pic)
16995
      {
16996
#ifndef NO_PROFILE_COUNTERS
16997
        fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
16998
#endif
16999
        fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17000
      }
17001
    else
17002
      {
17003
#ifndef NO_PROFILE_COUNTERS
17004
        fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17005
#endif
17006
        fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17007
      }
17008
  else if (flag_pic)
17009
    {
17010
#ifndef NO_PROFILE_COUNTERS
17011
      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17012
               LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17013
#endif
17014
      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17015
    }
17016
  else
17017
    {
17018
#ifndef NO_PROFILE_COUNTERS
17019
      fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17020
               PROFILE_COUNT_REGISTER);
17021
#endif
17022
      fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17023
    }
17024
}
17025
 
17026
/* We don't have exact information about the insn sizes, but we may assume
17027
   quite safely that we are informed about all 1 byte insns and memory
17028
   address sizes.  This is enough to eliminate unnecessary padding in
17029
   99% of cases.  */
17030
 
17031
static int
17032
min_insn_size (rtx insn)
17033
{
17034
  int l = 0;
17035
 
17036
  if (!INSN_P (insn) || !active_insn_p (insn))
17037
    return 0;
17038
 
17039
  /* Discard alignments we've emit and jump instructions.  */
17040
  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17041
      && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17042
    return 0;
17043
  if (GET_CODE (insn) == JUMP_INSN
17044
      && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17045
          || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17046
    return 0;
17047
 
17048
  /* Important case - calls are always 5 bytes.
17049
     It is common to have many calls in the row.  */
17050
  if (GET_CODE (insn) == CALL_INSN
17051
      && symbolic_reference_mentioned_p (PATTERN (insn))
17052
      && !SIBLING_CALL_P (insn))
17053
    return 5;
17054
  if (get_attr_length (insn) <= 1)
17055
    return 1;
17056
 
17057
  /* For normal instructions we may rely on the sizes of addresses
17058
     and the presence of symbol to require 4 bytes of encoding.
17059
     This is not the case for jumps where references are PC relative.  */
17060
  if (GET_CODE (insn) != JUMP_INSN)
17061
    {
17062
      l = get_attr_length_address (insn);
17063
      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17064
        l = 4;
17065
    }
17066
  if (l)
17067
    return 1+l;
17068
  else
17069
    return 2;
17070
}
17071
 
17072
/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17073
   window.  */
17074
 
17075
static void
17076
ix86_avoid_jump_misspredicts (void)
17077
{
17078
  rtx insn, start = get_insns ();
17079
  int nbytes = 0, njumps = 0;
17080
  int isjump = 0;
17081
 
17082
  /* Look for all minimal intervals of instructions containing 4 jumps.
17083
     The intervals are bounded by START and INSN.  NBYTES is the total
17084
     size of instructions in the interval including INSN and not including
17085
     START.  When the NBYTES is smaller than 16 bytes, it is possible
17086
     that the end of START and INSN ends up in the same 16byte page.
17087
 
17088
     The smallest offset in the page INSN can start is the case where START
17089
     ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
17090
     We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17091
     */
17092
  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17093
    {
17094
 
17095
      nbytes += min_insn_size (insn);
17096
      if (dump_file)
17097
        fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17098
                INSN_UID (insn), min_insn_size (insn));
17099
      if ((GET_CODE (insn) == JUMP_INSN
17100
           && GET_CODE (PATTERN (insn)) != ADDR_VEC
17101
           && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17102
          || GET_CODE (insn) == CALL_INSN)
17103
        njumps++;
17104
      else
17105
        continue;
17106
 
17107
      while (njumps > 3)
17108
        {
17109
          start = NEXT_INSN (start);
17110
          if ((GET_CODE (start) == JUMP_INSN
17111
               && GET_CODE (PATTERN (start)) != ADDR_VEC
17112
               && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17113
              || GET_CODE (start) == CALL_INSN)
17114
            njumps--, isjump = 1;
17115
          else
17116
            isjump = 0;
17117
          nbytes -= min_insn_size (start);
17118
        }
17119
      gcc_assert (njumps >= 0);
17120
      if (dump_file)
17121
        fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17122
                INSN_UID (start), INSN_UID (insn), nbytes);
17123
 
17124
      if (njumps == 3 && isjump && nbytes < 16)
17125
        {
17126
          int padsize = 15 - nbytes + min_insn_size (insn);
17127
 
17128
          if (dump_file)
17129
            fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17130
                     INSN_UID (insn), padsize);
17131
          emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17132
        }
17133
    }
17134
}
17135
 
17136
/* AMD Athlon works faster
17137
   when RET is not destination of conditional jump or directly preceded
17138
   by other jump instruction.  We avoid the penalty by inserting NOP just
17139
   before the RET instructions in such cases.  */
17140
static void
17141
ix86_pad_returns (void)
17142
{
17143
  edge e;
17144
  edge_iterator ei;
17145
 
17146
  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17147
    {
17148
      basic_block bb = e->src;
17149
      rtx ret = BB_END (bb);
17150
      rtx prev;
17151
      bool replace = false;
17152
 
17153
      if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17154
          || !maybe_hot_bb_p (bb))
17155
        continue;
17156
      for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17157
        if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17158
          break;
17159
      if (prev && GET_CODE (prev) == CODE_LABEL)
17160
        {
17161
          edge e;
17162
          edge_iterator ei;
17163
 
17164
          FOR_EACH_EDGE (e, ei, bb->preds)
17165
            if (EDGE_FREQUENCY (e) && e->src->index >= 0
17166
                && !(e->flags & EDGE_FALLTHRU))
17167
              replace = true;
17168
        }
17169
      if (!replace)
17170
        {
17171
          prev = prev_active_insn (ret);
17172
          if (prev
17173
              && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17174
                  || GET_CODE (prev) == CALL_INSN))
17175
            replace = true;
17176
          /* Empty functions get branch mispredict even when the jump destination
17177
             is not visible to us.  */
17178
          if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17179
            replace = true;
17180
        }
17181
      if (replace)
17182
        {
17183
          emit_insn_before (gen_return_internal_long (), ret);
17184
          delete_insn (ret);
17185
        }
17186
    }
17187
}
17188
 
17189
/* Implement machine specific optimizations.  We implement padding of returns
17190
   for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
17191
static void
17192
ix86_reorg (void)
17193
{
17194
  if (TARGET_ATHLON_K8 && optimize && !optimize_size)
17195
    ix86_pad_returns ();
17196
  if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17197
    ix86_avoid_jump_misspredicts ();
17198
}
17199
 
17200
/* Return nonzero when QImode register that must be represented via REX prefix
17201
   is used.  */
17202
bool
17203
x86_extended_QIreg_mentioned_p (rtx insn)
17204
{
17205
  int i;
17206
  extract_insn_cached (insn);
17207
  for (i = 0; i < recog_data.n_operands; i++)
17208
    if (REG_P (recog_data.operand[i])
17209
        && REGNO (recog_data.operand[i]) >= 4)
17210
       return true;
17211
  return false;
17212
}
17213
 
17214
/* Return nonzero when P points to register encoded via REX prefix.
17215
   Called via for_each_rtx.  */
17216
static int
17217
extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
17218
{
17219
   unsigned int regno;
17220
   if (!REG_P (*p))
17221
     return 0;
17222
   regno = REGNO (*p);
17223
   return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17224
}
17225
 
17226
/* Return true when INSN mentions register that must be encoded using REX
17227
   prefix.  */
17228
bool
17229
x86_extended_reg_mentioned_p (rtx insn)
17230
{
17231
  return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17232
}
17233
 
17234
/* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
17235
   optabs would emit if we didn't have TFmode patterns.  */
17236
 
17237
void
17238
x86_emit_floatuns (rtx operands[2])
17239
{
17240
  rtx neglab, donelab, i0, i1, f0, in, out;
17241
  enum machine_mode mode, inmode;
17242
 
17243
  inmode = GET_MODE (operands[1]);
17244
  gcc_assert (inmode == SImode || inmode == DImode);
17245
 
17246
  out = operands[0];
17247
  in = force_reg (inmode, operands[1]);
17248
  mode = GET_MODE (out);
17249
  neglab = gen_label_rtx ();
17250
  donelab = gen_label_rtx ();
17251
  i1 = gen_reg_rtx (Pmode);
17252
  f0 = gen_reg_rtx (mode);
17253
 
17254
  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17255
 
17256
  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17257
  emit_jump_insn (gen_jump (donelab));
17258
  emit_barrier ();
17259
 
17260
  emit_label (neglab);
17261
 
17262
  i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17263
  i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17264
  i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17265
  expand_float (f0, i0, 0);
17266
  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17267
 
17268
  emit_label (donelab);
17269
}
17270
 
17271
/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
17272
   with all elements equal to VAR.  Return true if successful.  */
17273
 
17274
static bool
17275
ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17276
                                   rtx target, rtx val)
17277
{
17278
  enum machine_mode smode, wsmode, wvmode;
17279
  rtx x;
17280
 
17281
  switch (mode)
17282
    {
17283
    case V2SImode:
17284
    case V2SFmode:
17285
      if (!mmx_ok && !TARGET_SSE)
17286
        return false;
17287
      /* FALLTHRU */
17288
 
17289
    case V2DFmode:
17290
    case V2DImode:
17291
    case V4SFmode:
17292
    case V4SImode:
17293
      val = force_reg (GET_MODE_INNER (mode), val);
17294
      x = gen_rtx_VEC_DUPLICATE (mode, val);
17295
      emit_insn (gen_rtx_SET (VOIDmode, target, x));
17296
      return true;
17297
 
17298
    case V4HImode:
17299
      if (!mmx_ok)
17300
        return false;
17301
      if (TARGET_SSE || TARGET_3DNOW_A)
17302
        {
17303
          val = gen_lowpart (SImode, val);
17304
          x = gen_rtx_TRUNCATE (HImode, val);
17305
          x = gen_rtx_VEC_DUPLICATE (mode, x);
17306
          emit_insn (gen_rtx_SET (VOIDmode, target, x));
17307
          return true;
17308
        }
17309
      else
17310
        {
17311
          smode = HImode;
17312
          wsmode = SImode;
17313
          wvmode = V2SImode;
17314
          goto widen;
17315
        }
17316
 
17317
    case V8QImode:
17318
      if (!mmx_ok)
17319
        return false;
17320
      smode = QImode;
17321
      wsmode = HImode;
17322
      wvmode = V4HImode;
17323
      goto widen;
17324
    case V8HImode:
17325
      smode = HImode;
17326
      wsmode = SImode;
17327
      wvmode = V4SImode;
17328
      goto widen;
17329
    case V16QImode:
17330
      smode = QImode;
17331
      wsmode = HImode;
17332
      wvmode = V8HImode;
17333
      goto widen;
17334
    widen:
17335
      /* Replicate the value once into the next wider mode and recurse.  */
17336
      val = convert_modes (wsmode, smode, val, true);
17337
      x = expand_simple_binop (wsmode, ASHIFT, val,
17338
                               GEN_INT (GET_MODE_BITSIZE (smode)),
17339
                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
17340
      val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
17341
 
17342
      x = gen_reg_rtx (wvmode);
17343
      if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
17344
        gcc_unreachable ();
17345
      emit_move_insn (target, gen_lowpart (mode, x));
17346
      return true;
17347
 
17348
    default:
17349
      return false;
17350
    }
17351
}
17352
 
17353
/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
17354
   whose low element is VAR, and other elements are zero.  Return true
17355
   if successful.  */
17356
 
17357
static bool
17358
ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
17359
                                     rtx target, rtx var)
17360
{
17361
  enum machine_mode vsimode;
17362
  rtx x;
17363
 
17364
  switch (mode)
17365
    {
17366
    case V2SFmode:
17367
    case V2SImode:
17368
      if (!mmx_ok && !TARGET_SSE)
17369
        return false;
17370
      /* FALLTHRU */
17371
 
17372
    case V2DFmode:
17373
    case V2DImode:
17374
      var = force_reg (GET_MODE_INNER (mode), var);
17375
      x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
17376
      emit_insn (gen_rtx_SET (VOIDmode, target, x));
17377
      return true;
17378
 
17379
    case V4SFmode:
17380
    case V4SImode:
17381
      var = force_reg (GET_MODE_INNER (mode), var);
17382
      x = gen_rtx_VEC_DUPLICATE (mode, var);
17383
      x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
17384
      emit_insn (gen_rtx_SET (VOIDmode, target, x));
17385
      return true;
17386
 
17387
    case V8HImode:
17388
    case V16QImode:
17389
      vsimode = V4SImode;
17390
      goto widen;
17391
    case V4HImode:
17392
    case V8QImode:
17393
      if (!mmx_ok)
17394
        return false;
17395
      vsimode = V2SImode;
17396
      goto widen;
17397
    widen:
17398
      /* Zero extend the variable element to SImode and recurse.  */
17399
      var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
17400
 
17401
      x = gen_reg_rtx (vsimode);
17402
      if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
17403
        gcc_unreachable ();
17404
 
17405
      emit_move_insn (target, gen_lowpart (mode, x));
17406
      return true;
17407
 
17408
    default:
17409
      return false;
17410
    }
17411
}
17412
 
17413
/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
17414
   consisting of the values in VALS.  It is known that all elements
17415
   except ONE_VAR are constants.  Return true if successful.  */
17416
 
17417
static bool
17418
ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
17419
                                 rtx target, rtx vals, int one_var)
17420
{
17421
  rtx var = XVECEXP (vals, 0, one_var);
17422
  enum machine_mode wmode;
17423
  rtx const_vec, x;
17424
 
17425
  const_vec = copy_rtx (vals);
17426
  XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
17427
  const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
17428
 
17429
  switch (mode)
17430
    {
17431
    case V2DFmode:
17432
    case V2DImode:
17433
    case V2SFmode:
17434
    case V2SImode:
17435
      /* For the two element vectors, it's just as easy to use
17436
         the general case.  */
17437
      return false;
17438
 
17439
    case V4SFmode:
17440
    case V4SImode:
17441
    case V8HImode:
17442
    case V4HImode:
17443
      break;
17444
 
17445
    case V16QImode:
17446
      wmode = V8HImode;
17447
      goto widen;
17448
    case V8QImode:
17449
      wmode = V4HImode;
17450
      goto widen;
17451
    widen:
17452
      /* There's no way to set one QImode entry easily.  Combine
17453
         the variable value with its adjacent constant value, and
17454
         promote to an HImode set.  */
17455
      x = XVECEXP (vals, 0, one_var ^ 1);
17456
      if (one_var & 1)
17457
        {
17458
          var = convert_modes (HImode, QImode, var, true);
17459
          var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
17460
                                     NULL_RTX, 1, OPTAB_LIB_WIDEN);
17461
          x = GEN_INT (INTVAL (x) & 0xff);
17462
        }
17463
      else
17464
        {
17465
          var = convert_modes (HImode, QImode, var, true);
17466
          x = gen_int_mode (INTVAL (x) << 8, HImode);
17467
        }
17468
      if (x != const0_rtx)
17469
        var = expand_simple_binop (HImode, IOR, var, x, var,
17470
                                   1, OPTAB_LIB_WIDEN);
17471
 
17472
      x = gen_reg_rtx (wmode);
17473
      emit_move_insn (x, gen_lowpart (wmode, const_vec));
17474
      ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
17475
 
17476
      emit_move_insn (target, gen_lowpart (mode, x));
17477
      return true;
17478
 
17479
    default:
17480
      return false;
17481
    }
17482
 
17483
  emit_move_insn (target, const_vec);
17484
  ix86_expand_vector_set (mmx_ok, target, var, one_var);
17485
  return true;
17486
}
17487
 
17488
/* A subroutine of ix86_expand_vector_init.  Handle the most general case:
17489
   all values variable, and none identical.  */
17490
 
17491
static void
17492
ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
17493
                                 rtx target, rtx vals)
17494
{
17495
  enum machine_mode half_mode = GET_MODE_INNER (mode);
17496
  rtx op0 = NULL, op1 = NULL;
17497
  bool use_vec_concat = false;
17498
 
17499
  switch (mode)
17500
    {
17501
    case V2SFmode:
17502
    case V2SImode:
17503
      if (!mmx_ok && !TARGET_SSE)
17504
        break;
17505
      /* FALLTHRU */
17506
 
17507
    case V2DFmode:
17508
    case V2DImode:
17509
      /* For the two element vectors, we always implement VEC_CONCAT.  */
17510
      op0 = XVECEXP (vals, 0, 0);
17511
      op1 = XVECEXP (vals, 0, 1);
17512
      use_vec_concat = true;
17513
      break;
17514
 
17515
    case V4SFmode:
17516
      half_mode = V2SFmode;
17517
      goto half;
17518
    case V4SImode:
17519
      half_mode = V2SImode;
17520
      goto half;
17521
    half:
17522
      {
17523
        rtvec v;
17524
 
17525
        /* For V4SF and V4SI, we implement a concat of two V2 vectors.
17526
           Recurse to load the two halves.  */
17527
 
17528
        op0 = gen_reg_rtx (half_mode);
17529
        v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
17530
        ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
17531
 
17532
        op1 = gen_reg_rtx (half_mode);
17533
        v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
17534
        ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
17535
 
17536
        use_vec_concat = true;
17537
      }
17538
      break;
17539
 
17540
    case V8HImode:
17541
    case V16QImode:
17542
    case V4HImode:
17543
    case V8QImode:
17544
      break;
17545
 
17546
    default:
17547
      gcc_unreachable ();
17548
    }
17549
 
17550
  if (use_vec_concat)
17551
    {
17552
      if (!register_operand (op0, half_mode))
17553
        op0 = force_reg (half_mode, op0);
17554
      if (!register_operand (op1, half_mode))
17555
        op1 = force_reg (half_mode, op1);
17556
 
17557
      emit_insn (gen_rtx_SET (VOIDmode, target,
17558
                              gen_rtx_VEC_CONCAT (mode, op0, op1)));
17559
    }
17560
  else
17561
    {
17562
      int i, j, n_elts, n_words, n_elt_per_word;
17563
      enum machine_mode inner_mode;
17564
      rtx words[4], shift;
17565
 
17566
      inner_mode = GET_MODE_INNER (mode);
17567
      n_elts = GET_MODE_NUNITS (mode);
17568
      n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
17569
      n_elt_per_word = n_elts / n_words;
17570
      shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
17571
 
17572
      for (i = 0; i < n_words; ++i)
17573
        {
17574
          rtx word = NULL_RTX;
17575
 
17576
          for (j = 0; j < n_elt_per_word; ++j)
17577
            {
17578
              rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
17579
              elt = convert_modes (word_mode, inner_mode, elt, true);
17580
 
17581
              if (j == 0)
17582
                word = elt;
17583
              else
17584
                {
17585
                  word = expand_simple_binop (word_mode, ASHIFT, word, shift,
17586
                                              word, 1, OPTAB_LIB_WIDEN);
17587
                  word = expand_simple_binop (word_mode, IOR, word, elt,
17588
                                              word, 1, OPTAB_LIB_WIDEN);
17589
                }
17590
            }
17591
 
17592
          words[i] = word;
17593
        }
17594
 
17595
      if (n_words == 1)
17596
        emit_move_insn (target, gen_lowpart (mode, words[0]));
17597
      else if (n_words == 2)
17598
        {
17599
          rtx tmp = gen_reg_rtx (mode);
17600
          emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
17601
          emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
17602
          emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
17603
          emit_move_insn (target, tmp);
17604
        }
17605
      else if (n_words == 4)
17606
        {
17607
          rtx tmp = gen_reg_rtx (V4SImode);
17608
          vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
17609
          ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
17610
          emit_move_insn (target, gen_lowpart (mode, tmp));
17611
        }
17612
      else
17613
        gcc_unreachable ();
17614
    }
17615
}
17616
 
17617
/* Initialize vector TARGET via VALS.  Suppress the use of MMX
17618
   instructions unless MMX_OK is true.  */
17619
 
17620
void
17621
ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
17622
{
17623
  enum machine_mode mode = GET_MODE (target);
17624
  enum machine_mode inner_mode = GET_MODE_INNER (mode);
17625
  int n_elts = GET_MODE_NUNITS (mode);
17626
  int n_var = 0, one_var = -1;
17627
  bool all_same = true, all_const_zero = true;
17628
  int i;
17629
  rtx x;
17630
 
17631
  for (i = 0; i < n_elts; ++i)
17632
    {
17633
      x = XVECEXP (vals, 0, i);
17634
      if (!CONSTANT_P (x))
17635
        n_var++, one_var = i;
17636
      else if (x != CONST0_RTX (inner_mode))
17637
        all_const_zero = false;
17638
      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
17639
        all_same = false;
17640
    }
17641
 
17642
  /* Constants are best loaded from the constant pool.  */
17643
  if (n_var == 0)
17644
    {
17645
      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
17646
      return;
17647
    }
17648
 
17649
  /* If all values are identical, broadcast the value.  */
17650
  if (all_same
17651
      && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
17652
                                            XVECEXP (vals, 0, 0)))
17653
    return;
17654
 
17655
  /* Values where only one field is non-constant are best loaded from
17656
     the pool and overwritten via move later.  */
17657
  if (n_var == 1)
17658
    {
17659
      if (all_const_zero && one_var == 0
17660
          && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
17661
                                                  XVECEXP (vals, 0, 0)))
17662
        return;
17663
 
17664
      if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
17665
        return;
17666
    }
17667
 
17668
  ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
17669
}
17670
 
17671
void
17672
ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
17673
{
17674
  enum machine_mode mode = GET_MODE (target);
17675
  enum machine_mode inner_mode = GET_MODE_INNER (mode);
17676
  bool use_vec_merge = false;
17677
  rtx tmp;
17678
 
17679
  switch (mode)
17680
    {
17681
    case V2SFmode:
17682
    case V2SImode:
17683
      if (mmx_ok)
17684
        {
17685
          tmp = gen_reg_rtx (GET_MODE_INNER (mode));
17686
          ix86_expand_vector_extract (true, tmp, target, 1 - elt);
17687
          if (elt == 0)
17688
            tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
17689
          else
17690
            tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
17691
          emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17692
          return;
17693
        }
17694
      break;
17695
 
17696
    case V2DFmode:
17697
    case V2DImode:
17698
      {
17699
        rtx op0, op1;
17700
 
17701
        /* For the two element vectors, we implement a VEC_CONCAT with
17702
           the extraction of the other element.  */
17703
 
17704
        tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
17705
        tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
17706
 
17707
        if (elt == 0)
17708
          op0 = val, op1 = tmp;
17709
        else
17710
          op0 = tmp, op1 = val;
17711
 
17712
        tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
17713
        emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17714
      }
17715
      return;
17716
 
17717
    case V4SFmode:
17718
      switch (elt)
17719
        {
17720
        case 0:
17721
          use_vec_merge = true;
17722
          break;
17723
 
17724
        case 1:
17725
          /* tmp = target = A B C D */
17726
          tmp = copy_to_reg (target);
17727
          /* target = A A B B */
17728
          emit_insn (gen_sse_unpcklps (target, target, target));
17729
          /* target = X A B B */
17730
          ix86_expand_vector_set (false, target, val, 0);
17731
          /* target = A X C D  */
17732
          emit_insn (gen_sse_shufps_1 (target, target, tmp,
17733
                                       GEN_INT (1), GEN_INT (0),
17734
                                       GEN_INT (2+4), GEN_INT (3+4)));
17735
          return;
17736
 
17737
        case 2:
17738
          /* tmp = target = A B C D */
17739
          tmp = copy_to_reg (target);
17740
          /* tmp = X B C D */
17741
          ix86_expand_vector_set (false, tmp, val, 0);
17742
          /* target = A B X D */
17743
          emit_insn (gen_sse_shufps_1 (target, target, tmp,
17744
                                       GEN_INT (0), GEN_INT (1),
17745
                                       GEN_INT (0+4), GEN_INT (3+4)));
17746
          return;
17747
 
17748
        case 3:
17749
          /* tmp = target = A B C D */
17750
          tmp = copy_to_reg (target);
17751
          /* tmp = X B C D */
17752
          ix86_expand_vector_set (false, tmp, val, 0);
17753
          /* target = A B X D */
17754
          emit_insn (gen_sse_shufps_1 (target, target, tmp,
17755
                                       GEN_INT (0), GEN_INT (1),
17756
                                       GEN_INT (2+4), GEN_INT (0+4)));
17757
          return;
17758
 
17759
        default:
17760
          gcc_unreachable ();
17761
        }
17762
      break;
17763
 
17764
    case V4SImode:
17765
      /* Element 0 handled by vec_merge below.  */
17766
      if (elt == 0)
17767
        {
17768
          use_vec_merge = true;
17769
          break;
17770
        }
17771
 
17772
      if (TARGET_SSE2)
17773
        {
17774
          /* With SSE2, use integer shuffles to swap element 0 and ELT,
17775
             store into element 0, then shuffle them back.  */
17776
 
17777
          rtx order[4];
17778
 
17779
          order[0] = GEN_INT (elt);
17780
          order[1] = const1_rtx;
17781
          order[2] = const2_rtx;
17782
          order[3] = GEN_INT (3);
17783
          order[elt] = const0_rtx;
17784
 
17785
          emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17786
                                        order[1], order[2], order[3]));
17787
 
17788
          ix86_expand_vector_set (false, target, val, 0);
17789
 
17790
          emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17791
                                        order[1], order[2], order[3]));
17792
        }
17793
      else
17794
        {
17795
          /* For SSE1, we have to reuse the V4SF code.  */
17796
          ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
17797
                                  gen_lowpart (SFmode, val), elt);
17798
        }
17799
      return;
17800
 
17801
    case V8HImode:
17802
      use_vec_merge = TARGET_SSE2;
17803
      break;
17804
    case V4HImode:
17805
      use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17806
      break;
17807
 
17808
    case V16QImode:
17809
    case V8QImode:
17810
    default:
17811
      break;
17812
    }
17813
 
17814
  if (use_vec_merge)
17815
    {
17816
      tmp = gen_rtx_VEC_DUPLICATE (mode, val);
17817
      tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
17818
      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17819
    }
17820
  else
17821
    {
17822
      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17823
 
17824
      emit_move_insn (mem, target);
17825
 
17826
      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17827
      emit_move_insn (tmp, val);
17828
 
17829
      emit_move_insn (target, mem);
17830
    }
17831
}
17832
 
17833
void
17834
ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
17835
{
17836
  enum machine_mode mode = GET_MODE (vec);
17837
  enum machine_mode inner_mode = GET_MODE_INNER (mode);
17838
  bool use_vec_extr = false;
17839
  rtx tmp;
17840
 
17841
  switch (mode)
17842
    {
17843
    case V2SImode:
17844
    case V2SFmode:
17845
      if (!mmx_ok)
17846
        break;
17847
      /* FALLTHRU */
17848
 
17849
    case V2DFmode:
17850
    case V2DImode:
17851
      use_vec_extr = true;
17852
      break;
17853
 
17854
    case V4SFmode:
17855
      switch (elt)
17856
        {
17857
        case 0:
17858
          tmp = vec;
17859
          break;
17860
 
17861
        case 1:
17862
        case 3:
17863
          tmp = gen_reg_rtx (mode);
17864
          emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
17865
                                       GEN_INT (elt), GEN_INT (elt),
17866
                                       GEN_INT (elt+4), GEN_INT (elt+4)));
17867
          break;
17868
 
17869
        case 2:
17870
          tmp = gen_reg_rtx (mode);
17871
          emit_insn (gen_sse_unpckhps (tmp, vec, vec));
17872
          break;
17873
 
17874
        default:
17875
          gcc_unreachable ();
17876
        }
17877
      vec = tmp;
17878
      use_vec_extr = true;
17879
      elt = 0;
17880
      break;
17881
 
17882
    case V4SImode:
17883
      if (TARGET_SSE2)
17884
        {
17885
          switch (elt)
17886
            {
17887
            case 0:
17888
              tmp = vec;
17889
              break;
17890
 
17891
            case 1:
17892
            case 3:
17893
              tmp = gen_reg_rtx (mode);
17894
              emit_insn (gen_sse2_pshufd_1 (tmp, vec,
17895
                                            GEN_INT (elt), GEN_INT (elt),
17896
                                            GEN_INT (elt), GEN_INT (elt)));
17897
              break;
17898
 
17899
            case 2:
17900
              tmp = gen_reg_rtx (mode);
17901
              emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
17902
              break;
17903
 
17904
            default:
17905
              gcc_unreachable ();
17906
            }
17907
          vec = tmp;
17908
          use_vec_extr = true;
17909
          elt = 0;
17910
        }
17911
      else
17912
        {
17913
          /* For SSE1, we have to reuse the V4SF code.  */
17914
          ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
17915
                                      gen_lowpart (V4SFmode, vec), elt);
17916
          return;
17917
        }
17918
      break;
17919
 
17920
    case V8HImode:
17921
      use_vec_extr = TARGET_SSE2;
17922
      break;
17923
    case V4HImode:
17924
      use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17925
      break;
17926
 
17927
    case V16QImode:
17928
    case V8QImode:
17929
      /* ??? Could extract the appropriate HImode element and shift.  */
17930
    default:
17931
      break;
17932
    }
17933
 
17934
  if (use_vec_extr)
17935
    {
17936
      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
17937
      tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
17938
 
17939
      /* Let the rtl optimizers know about the zero extension performed.  */
17940
      if (inner_mode == HImode)
17941
        {
17942
          tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
17943
          target = gen_lowpart (SImode, target);
17944
        }
17945
 
17946
      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17947
    }
17948
  else
17949
    {
17950
      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17951
 
17952
      emit_move_insn (mem, vec);
17953
 
17954
      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17955
      emit_move_insn (target, tmp);
17956
    }
17957
}
17958
 
17959
/* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
17960
   pattern to reduce; DEST is the destination; IN is the input vector.  */
17961
 
17962
void
17963
ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
17964
{
17965
  rtx tmp1, tmp2, tmp3;
17966
 
17967
  tmp1 = gen_reg_rtx (V4SFmode);
17968
  tmp2 = gen_reg_rtx (V4SFmode);
17969
  tmp3 = gen_reg_rtx (V4SFmode);
17970
 
17971
  emit_insn (gen_sse_movhlps (tmp1, in, in));
17972
  emit_insn (fn (tmp2, tmp1, in));
17973
 
17974
  emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
17975
                               GEN_INT (1), GEN_INT (1),
17976
                               GEN_INT (1+4), GEN_INT (1+4)));
17977
  emit_insn (fn (dest, tmp2, tmp3));
17978
}
17979
 
17980
/* Implements target hook vector_mode_supported_p.  */
17981
static bool
17982
ix86_vector_mode_supported_p (enum machine_mode mode)
17983
{
17984
  if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
17985
    return true;
17986
  if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
17987
    return true;
17988
  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
17989
    return true;
17990
  if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
17991
    return true;
17992
  return false;
17993
}
17994
 
17995
/* Worker function for TARGET_MD_ASM_CLOBBERS.
17996
 
17997
   We do this in the new i386 backend to maintain source compatibility
17998
   with the old cc0-based compiler.  */
17999
 
18000
static tree
18001
ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18002
                      tree inputs ATTRIBUTE_UNUSED,
18003
                      tree clobbers)
18004
{
18005
  clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18006
                        clobbers);
18007
  clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18008
                        clobbers);
18009
  clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18010
                        clobbers);
18011
  return clobbers;
18012
}
18013
 
18014
/* Return true if this goes in small data/bss.  */
18015
 
18016
static bool
18017
ix86_in_large_data_p (tree exp)
18018
{
18019
  if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18020
    return false;
18021
 
18022
  /* Functions are never large data.  */
18023
  if (TREE_CODE (exp) == FUNCTION_DECL)
18024
    return false;
18025
 
18026
  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18027
    {
18028
      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18029
      if (strcmp (section, ".ldata") == 0
18030
          || strcmp (section, ".lbss") == 0)
18031
        return true;
18032
      return false;
18033
    }
18034
  else
18035
    {
18036
      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18037
 
18038
      /* If this is an incomplete type with size 0, then we can't put it
18039
         in data because it might be too big when completed.  */
18040
      if (!size || size > ix86_section_threshold)
18041
        return true;
18042
    }
18043
 
18044
  return false;
18045
}
18046
static void
18047
ix86_encode_section_info (tree decl, rtx rtl, int first)
18048
{
18049
  default_encode_section_info (decl, rtl, first);
18050
 
18051
  if (TREE_CODE (decl) == VAR_DECL
18052
      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18053
      && ix86_in_large_data_p (decl))
18054
    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18055
}
18056
 
18057
/* Worker function for REVERSE_CONDITION.  */
18058
 
18059
enum rtx_code
18060
ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
18061
{
18062
  return (mode != CCFPmode && mode != CCFPUmode
18063
          ? reverse_condition (code)
18064
          : reverse_condition_maybe_unordered (code));
18065
}
18066
 
18067
/* Output code to perform an x87 FP register move, from OPERANDS[1]
18068
   to OPERANDS[0].  */
18069
 
18070
const char *
18071
output_387_reg_move (rtx insn, rtx *operands)
18072
{
18073
  if (REG_P (operands[1])
18074
      && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18075
    {
18076
      if (REGNO (operands[0]) == FIRST_STACK_REG
18077
          && TARGET_USE_FFREEP)
18078
        return "ffreep\t%y0";
18079
      return "fstp\t%y0";
18080
    }
18081
  if (STACK_TOP_P (operands[0]))
18082
    return "fld%z1\t%y1";
18083
  return "fst\t%y0";
18084
}
18085
 
18086
/* Output code to perform a conditional jump to LABEL, if C2 flag in
18087
   FP status register is set.  */
18088
 
18089
void
18090
ix86_emit_fp_unordered_jump (rtx label)
18091
{
18092
  rtx reg = gen_reg_rtx (HImode);
18093
  rtx temp;
18094
 
18095
  emit_insn (gen_x86_fnstsw_1 (reg));
18096
 
18097
  if (TARGET_USE_SAHF)
18098
    {
18099
      emit_insn (gen_x86_sahf_1 (reg));
18100
 
18101
      temp = gen_rtx_REG (CCmode, FLAGS_REG);
18102
      temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
18103
    }
18104
  else
18105
    {
18106
      emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
18107
 
18108
      temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18109
      temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
18110
    }
18111
 
18112
  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
18113
                              gen_rtx_LABEL_REF (VOIDmode, label),
18114
                              pc_rtx);
18115
  temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
18116
  emit_jump_insn (temp);
18117
}
18118
 
18119
/* Output code to perform a log1p XFmode calculation.  */
18120
 
18121
void ix86_emit_i387_log1p (rtx op0, rtx op1)
18122
{
18123
  rtx label1 = gen_label_rtx ();
18124
  rtx label2 = gen_label_rtx ();
18125
 
18126
  rtx tmp = gen_reg_rtx (XFmode);
18127
  rtx tmp2 = gen_reg_rtx (XFmode);
18128
 
18129
  emit_insn (gen_absxf2 (tmp, op1));
18130
  emit_insn (gen_cmpxf (tmp,
18131
    CONST_DOUBLE_FROM_REAL_VALUE (
18132
       REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
18133
       XFmode)));
18134
  emit_jump_insn (gen_bge (label1));
18135
 
18136
  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18137
  emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
18138
  emit_jump (label2);
18139
 
18140
  emit_label (label1);
18141
  emit_move_insn (tmp, CONST1_RTX (XFmode));
18142
  emit_insn (gen_addxf3 (tmp, op1, tmp));
18143
  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18144
  emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
18145
 
18146
  emit_label (label2);
18147
}
18148
 
18149
/* Solaris named-section hook.  Parameters are as for
18150
   named_section_real.  */
18151
 
18152
static void
18153
i386_solaris_elf_named_section (const char *name, unsigned int flags,
18154
                                tree decl)
18155
{
18156
  /* With Binutils 2.15, the "@unwind" marker must be specified on
18157
     every occurrence of the ".eh_frame" section, not just the first
18158
     one.  */
18159
  if (TARGET_64BIT
18160
      && strcmp (name, ".eh_frame") == 0)
18161
    {
18162
      fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18163
               flags & SECTION_WRITE ? "aw" : "a");
18164
      return;
18165
    }
18166
  default_elf_asm_named_section (name, flags, decl);
18167
}
18168
 
18169
/* Return the mangling of TYPE if it is an extended fundamental type.  */
18170
 
18171
static const char *
18172
ix86_mangle_fundamental_type (tree type)
18173
{
18174
  switch (TYPE_MODE (type))
18175
    {
18176
    case TFmode:
18177
      /* __float128 is "g".  */
18178
      return "g";
18179
    case XFmode:
18180
      /* "long double" or __float80 is "e".  */
18181
      return "e";
18182
    default:
18183
      return NULL;
18184
    }
18185
}
18186
 
18187
/* For 32-bit code we can save PIC register setup by using
18188
   __stack_chk_fail_local hidden function instead of calling
18189
   __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
18190
   register, so it is better to call __stack_chk_fail directly.  */
18191
 
18192
static tree
18193
ix86_stack_protect_fail (void)
18194
{
18195
  return TARGET_64BIT
18196
         ? default_external_stack_protect_fail ()
18197
         : default_hidden_stack_protect_fail ();
18198
}
18199
 
18200
/* Select a format to encode pointers in exception handling data.  CODE
18201
   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
18202
   true if the symbol may be affected by dynamic relocations.
18203
 
18204
   ??? All x86 object file formats are capable of representing this.
18205
   After all, the relocation needed is the same as for the call insn.
18206
   Whether or not a particular assembler allows us to enter such, I
18207
   guess we'll have to see.  */
18208
int
18209
asm_preferred_eh_data_format (int code, int global)
18210
{
18211
  if (flag_pic)
18212
    {
18213
int type = DW_EH_PE_sdata8;
18214
      if (!TARGET_64BIT
18215
          || ix86_cmodel == CM_SMALL_PIC
18216
          || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
18217
        type = DW_EH_PE_sdata4;
18218
      return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
18219
    }
18220
  if (ix86_cmodel == CM_SMALL
18221
      || (ix86_cmodel == CM_MEDIUM && code))
18222
    return DW_EH_PE_udata4;
18223
  return DW_EH_PE_absptr;
18224
}
18225
 
18226
#include "gt-i386.h"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.