OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.2.2/] [gcc/] [config/] [i386/] [i386.c] - Blame information for rev 645

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Subroutines used for code generation on IA-32.
2
   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3
   2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
 
5
This file is part of GCC.
6
 
7
GCC is free software; you can redistribute it and/or modify
8
it under the terms of the GNU General Public License as published by
9
the Free Software Foundation; either version 3, or (at your option)
10
any later version.
11
 
12
GCC is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
GNU General Public License for more details.
16
 
17
You should have received a copy of the GNU General Public License
18
along with GCC; see the file COPYING3.  If not see
19
<http://www.gnu.org/licenses/>.  */
20
 
21
#include "config.h"
22
#include "system.h"
23
#include "coretypes.h"
24
#include "tm.h"
25
#include "rtl.h"
26
#include "tree.h"
27
#include "tm_p.h"
28
#include "regs.h"
29
#include "hard-reg-set.h"
30
#include "real.h"
31
#include "insn-config.h"
32
#include "conditions.h"
33
#include "output.h"
34
#include "insn-codes.h"
35
#include "insn-attr.h"
36
#include "flags.h"
37
#include "except.h"
38
#include "function.h"
39
#include "recog.h"
40
#include "expr.h"
41
#include "optabs.h"
42
#include "toplev.h"
43
#include "basic-block.h"
44
#include "ggc.h"
45
#include "target.h"
46
#include "target-def.h"
47
#include "langhooks.h"
48
#include "cgraph.h"
49
#include "tree-gimple.h"
50
#include "dwarf2.h"
51
#include "tm-constrs.h"
52
 
53
#ifndef CHECK_STACK_LIMIT
54
#define CHECK_STACK_LIMIT (-1)
55
#endif
56
 
57
/* Return index of given mode in mult and division cost tables.  */
58
#define MODE_INDEX(mode)                                        \
59
  ((mode) == QImode ? 0                                          \
60
   : (mode) == HImode ? 1                                       \
61
   : (mode) == SImode ? 2                                       \
62
   : (mode) == DImode ? 3                                       \
63
   : 4)
64
 
65
/* Processor costs (relative to an add) */
66
/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
67
#define COSTS_N_BYTES(N) ((N) * 2)
68
 
69
static const
70
struct processor_costs size_cost = {    /* costs for tuning for size */
71
  COSTS_N_BYTES (2),                    /* cost of an add instruction */
72
  COSTS_N_BYTES (3),                    /* cost of a lea instruction */
73
  COSTS_N_BYTES (2),                    /* variable shift costs */
74
  COSTS_N_BYTES (3),                    /* constant shift costs */
75
  {COSTS_N_BYTES (3),                   /* cost of starting multiply for QI */
76
   COSTS_N_BYTES (3),                   /*                               HI */
77
   COSTS_N_BYTES (3),                   /*                               SI */
78
   COSTS_N_BYTES (3),                   /*                               DI */
79
   COSTS_N_BYTES (5)},                  /*                            other */
80
  0,                                     /* cost of multiply per each bit set */
81
  {COSTS_N_BYTES (3),                   /* cost of a divide/mod for QI */
82
   COSTS_N_BYTES (3),                   /*                          HI */
83
   COSTS_N_BYTES (3),                   /*                          SI */
84
   COSTS_N_BYTES (3),                   /*                          DI */
85
   COSTS_N_BYTES (5)},                  /*                       other */
86
  COSTS_N_BYTES (3),                    /* cost of movsx */
87
  COSTS_N_BYTES (3),                    /* cost of movzx */
88
  0,                                     /* "large" insn */
89
  2,                                    /* MOVE_RATIO */
90
  2,                                    /* cost for loading QImode using movzbl */
91
  {2, 2, 2},                            /* cost of loading integer registers
92
                                           in QImode, HImode and SImode.
93
                                           Relative to reg-reg move (2).  */
94
  {2, 2, 2},                            /* cost of storing integer registers */
95
  2,                                    /* cost of reg,reg fld/fst */
96
  {2, 2, 2},                            /* cost of loading fp registers
97
                                           in SFmode, DFmode and XFmode */
98
  {2, 2, 2},                            /* cost of storing fp registers
99
                                           in SFmode, DFmode and XFmode */
100
  3,                                    /* cost of moving MMX register */
101
  {3, 3},                               /* cost of loading MMX registers
102
                                           in SImode and DImode */
103
  {3, 3},                               /* cost of storing MMX registers
104
                                           in SImode and DImode */
105
  3,                                    /* cost of moving SSE register */
106
  {3, 3, 3},                            /* cost of loading SSE registers
107
                                           in SImode, DImode and TImode */
108
  {3, 3, 3},                            /* cost of storing SSE registers
109
                                           in SImode, DImode and TImode */
110
  3,                                    /* MMX or SSE register to integer */
111
  0,                                     /* size of prefetch block */
112
  0,                                     /* number of parallel prefetches */
113
  2,                                    /* Branch cost */
114
  COSTS_N_BYTES (2),                    /* cost of FADD and FSUB insns.  */
115
  COSTS_N_BYTES (2),                    /* cost of FMUL instruction.  */
116
  COSTS_N_BYTES (2),                    /* cost of FDIV instruction.  */
117
  COSTS_N_BYTES (2),                    /* cost of FABS instruction.  */
118
  COSTS_N_BYTES (2),                    /* cost of FCHS instruction.  */
119
  COSTS_N_BYTES (2),                    /* cost of FSQRT instruction.  */
120
};
121
 
122
/* Processor costs (relative to an add) */
123
static const
124
struct processor_costs i386_cost = {    /* 386 specific costs */
125
  COSTS_N_INSNS (1),                    /* cost of an add instruction */
126
  COSTS_N_INSNS (1),                    /* cost of a lea instruction */
127
  COSTS_N_INSNS (3),                    /* variable shift costs */
128
  COSTS_N_INSNS (2),                    /* constant shift costs */
129
  {COSTS_N_INSNS (6),                   /* cost of starting multiply for QI */
130
   COSTS_N_INSNS (6),                   /*                               HI */
131
   COSTS_N_INSNS (6),                   /*                               SI */
132
   COSTS_N_INSNS (6),                   /*                               DI */
133
   COSTS_N_INSNS (6)},                  /*                               other */
134
  COSTS_N_INSNS (1),                    /* cost of multiply per each bit set */
135
  {COSTS_N_INSNS (23),                  /* cost of a divide/mod for QI */
136
   COSTS_N_INSNS (23),                  /*                          HI */
137
   COSTS_N_INSNS (23),                  /*                          SI */
138
   COSTS_N_INSNS (23),                  /*                          DI */
139
   COSTS_N_INSNS (23)},                 /*                          other */
140
  COSTS_N_INSNS (3),                    /* cost of movsx */
141
  COSTS_N_INSNS (2),                    /* cost of movzx */
142
  15,                                   /* "large" insn */
143
  3,                                    /* MOVE_RATIO */
144
  4,                                    /* cost for loading QImode using movzbl */
145
  {2, 4, 2},                            /* cost of loading integer registers
146
                                           in QImode, HImode and SImode.
147
                                           Relative to reg-reg move (2).  */
148
  {2, 4, 2},                            /* cost of storing integer registers */
149
  2,                                    /* cost of reg,reg fld/fst */
150
  {8, 8, 8},                            /* cost of loading fp registers
151
                                           in SFmode, DFmode and XFmode */
152
  {8, 8, 8},                            /* cost of storing fp registers
153
                                           in SFmode, DFmode and XFmode */
154
  2,                                    /* cost of moving MMX register */
155
  {4, 8},                               /* cost of loading MMX registers
156
                                           in SImode and DImode */
157
  {4, 8},                               /* cost of storing MMX registers
158
                                           in SImode and DImode */
159
  2,                                    /* cost of moving SSE register */
160
  {4, 8, 16},                           /* cost of loading SSE registers
161
                                           in SImode, DImode and TImode */
162
  {4, 8, 16},                           /* cost of storing SSE registers
163
                                           in SImode, DImode and TImode */
164
  3,                                    /* MMX or SSE register to integer */
165
  0,                                     /* size of prefetch block */
166
  0,                                     /* number of parallel prefetches */
167
  1,                                    /* Branch cost */
168
  COSTS_N_INSNS (23),                   /* cost of FADD and FSUB insns.  */
169
  COSTS_N_INSNS (27),                   /* cost of FMUL instruction.  */
170
  COSTS_N_INSNS (88),                   /* cost of FDIV instruction.  */
171
  COSTS_N_INSNS (22),                   /* cost of FABS instruction.  */
172
  COSTS_N_INSNS (24),                   /* cost of FCHS instruction.  */
173
  COSTS_N_INSNS (122),                  /* cost of FSQRT instruction.  */
174
};
175
 
176
static const
177
struct processor_costs i486_cost = {    /* 486 specific costs */
178
  COSTS_N_INSNS (1),                    /* cost of an add instruction */
179
  COSTS_N_INSNS (1),                    /* cost of a lea instruction */
180
  COSTS_N_INSNS (3),                    /* variable shift costs */
181
  COSTS_N_INSNS (2),                    /* constant shift costs */
182
  {COSTS_N_INSNS (12),                  /* cost of starting multiply for QI */
183
   COSTS_N_INSNS (12),                  /*                               HI */
184
   COSTS_N_INSNS (12),                  /*                               SI */
185
   COSTS_N_INSNS (12),                  /*                               DI */
186
   COSTS_N_INSNS (12)},                 /*                               other */
187
  1,                                    /* cost of multiply per each bit set */
188
  {COSTS_N_INSNS (40),                  /* cost of a divide/mod for QI */
189
   COSTS_N_INSNS (40),                  /*                          HI */
190
   COSTS_N_INSNS (40),                  /*                          SI */
191
   COSTS_N_INSNS (40),                  /*                          DI */
192
   COSTS_N_INSNS (40)},                 /*                          other */
193
  COSTS_N_INSNS (3),                    /* cost of movsx */
194
  COSTS_N_INSNS (2),                    /* cost of movzx */
195
  15,                                   /* "large" insn */
196
  3,                                    /* MOVE_RATIO */
197
  4,                                    /* cost for loading QImode using movzbl */
198
  {2, 4, 2},                            /* cost of loading integer registers
199
                                           in QImode, HImode and SImode.
200
                                           Relative to reg-reg move (2).  */
201
  {2, 4, 2},                            /* cost of storing integer registers */
202
  2,                                    /* cost of reg,reg fld/fst */
203
  {8, 8, 8},                            /* cost of loading fp registers
204
                                           in SFmode, DFmode and XFmode */
205
  {8, 8, 8},                            /* cost of storing fp registers
206
                                           in SFmode, DFmode and XFmode */
207
  2,                                    /* cost of moving MMX register */
208
  {4, 8},                               /* cost of loading MMX registers
209
                                           in SImode and DImode */
210
  {4, 8},                               /* cost of storing MMX registers
211
                                           in SImode and DImode */
212
  2,                                    /* cost of moving SSE register */
213
  {4, 8, 16},                           /* cost of loading SSE registers
214
                                           in SImode, DImode and TImode */
215
  {4, 8, 16},                           /* cost of storing SSE registers
216
                                           in SImode, DImode and TImode */
217
  3,                                    /* MMX or SSE register to integer */
218
  0,                                     /* size of prefetch block */
219
  0,                                     /* number of parallel prefetches */
220
  1,                                    /* Branch cost */
221
  COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
222
  COSTS_N_INSNS (16),                   /* cost of FMUL instruction.  */
223
  COSTS_N_INSNS (73),                   /* cost of FDIV instruction.  */
224
  COSTS_N_INSNS (3),                    /* cost of FABS instruction.  */
225
  COSTS_N_INSNS (3),                    /* cost of FCHS instruction.  */
226
  COSTS_N_INSNS (83),                   /* cost of FSQRT instruction.  */
227
};
228
 
229
static const
230
struct processor_costs pentium_cost = {
231
  COSTS_N_INSNS (1),                    /* cost of an add instruction */
232
  COSTS_N_INSNS (1),                    /* cost of a lea instruction */
233
  COSTS_N_INSNS (4),                    /* variable shift costs */
234
  COSTS_N_INSNS (1),                    /* constant shift costs */
235
  {COSTS_N_INSNS (11),                  /* cost of starting multiply for QI */
236
   COSTS_N_INSNS (11),                  /*                               HI */
237
   COSTS_N_INSNS (11),                  /*                               SI */
238
   COSTS_N_INSNS (11),                  /*                               DI */
239
   COSTS_N_INSNS (11)},                 /*                               other */
240
  0,                                     /* cost of multiply per each bit set */
241
  {COSTS_N_INSNS (25),                  /* cost of a divide/mod for QI */
242
   COSTS_N_INSNS (25),                  /*                          HI */
243
   COSTS_N_INSNS (25),                  /*                          SI */
244
   COSTS_N_INSNS (25),                  /*                          DI */
245
   COSTS_N_INSNS (25)},                 /*                          other */
246
  COSTS_N_INSNS (3),                    /* cost of movsx */
247
  COSTS_N_INSNS (2),                    /* cost of movzx */
248
  8,                                    /* "large" insn */
249
  6,                                    /* MOVE_RATIO */
250
  6,                                    /* cost for loading QImode using movzbl */
251
  {2, 4, 2},                            /* cost of loading integer registers
252
                                           in QImode, HImode and SImode.
253
                                           Relative to reg-reg move (2).  */
254
  {2, 4, 2},                            /* cost of storing integer registers */
255
  2,                                    /* cost of reg,reg fld/fst */
256
  {2, 2, 6},                            /* cost of loading fp registers
257
                                           in SFmode, DFmode and XFmode */
258
  {4, 4, 6},                            /* cost of storing fp registers
259
                                           in SFmode, DFmode and XFmode */
260
  8,                                    /* cost of moving MMX register */
261
  {8, 8},                               /* cost of loading MMX registers
262
                                           in SImode and DImode */
263
  {8, 8},                               /* cost of storing MMX registers
264
                                           in SImode and DImode */
265
  2,                                    /* cost of moving SSE register */
266
  {4, 8, 16},                           /* cost of loading SSE registers
267
                                           in SImode, DImode and TImode */
268
  {4, 8, 16},                           /* cost of storing SSE registers
269
                                           in SImode, DImode and TImode */
270
  3,                                    /* MMX or SSE register to integer */
271
  0,                                     /* size of prefetch block */
272
  0,                                     /* number of parallel prefetches */
273
  2,                                    /* Branch cost */
274
  COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
275
  COSTS_N_INSNS (3),                    /* cost of FMUL instruction.  */
276
  COSTS_N_INSNS (39),                   /* cost of FDIV instruction.  */
277
  COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
278
  COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
279
  COSTS_N_INSNS (70),                   /* cost of FSQRT instruction.  */
280
};
281
 
282
static const
283
struct processor_costs pentiumpro_cost = {
284
  COSTS_N_INSNS (1),                    /* cost of an add instruction */
285
  COSTS_N_INSNS (1),                    /* cost of a lea instruction */
286
  COSTS_N_INSNS (1),                    /* variable shift costs */
287
  COSTS_N_INSNS (1),                    /* constant shift costs */
288
  {COSTS_N_INSNS (4),                   /* cost of starting multiply for QI */
289
   COSTS_N_INSNS (4),                   /*                               HI */
290
   COSTS_N_INSNS (4),                   /*                               SI */
291
   COSTS_N_INSNS (4),                   /*                               DI */
292
   COSTS_N_INSNS (4)},                  /*                               other */
293
  0,                                     /* cost of multiply per each bit set */
294
  {COSTS_N_INSNS (17),                  /* cost of a divide/mod for QI */
295
   COSTS_N_INSNS (17),                  /*                          HI */
296
   COSTS_N_INSNS (17),                  /*                          SI */
297
   COSTS_N_INSNS (17),                  /*                          DI */
298
   COSTS_N_INSNS (17)},                 /*                          other */
299
  COSTS_N_INSNS (1),                    /* cost of movsx */
300
  COSTS_N_INSNS (1),                    /* cost of movzx */
301
  8,                                    /* "large" insn */
302
  6,                                    /* MOVE_RATIO */
303
  2,                                    /* cost for loading QImode using movzbl */
304
  {4, 4, 4},                            /* cost of loading integer registers
305
                                           in QImode, HImode and SImode.
306
                                           Relative to reg-reg move (2).  */
307
  {2, 2, 2},                            /* cost of storing integer registers */
308
  2,                                    /* cost of reg,reg fld/fst */
309
  {2, 2, 6},                            /* cost of loading fp registers
310
                                           in SFmode, DFmode and XFmode */
311
  {4, 4, 6},                            /* cost of storing fp registers
312
                                           in SFmode, DFmode and XFmode */
313
  2,                                    /* cost of moving MMX register */
314
  {2, 2},                               /* cost of loading MMX registers
315
                                           in SImode and DImode */
316
  {2, 2},                               /* cost of storing MMX registers
317
                                           in SImode and DImode */
318
  2,                                    /* cost of moving SSE register */
319
  {2, 2, 8},                            /* cost of loading SSE registers
320
                                           in SImode, DImode and TImode */
321
  {2, 2, 8},                            /* cost of storing SSE registers
322
                                           in SImode, DImode and TImode */
323
  3,                                    /* MMX or SSE register to integer */
324
  32,                                   /* size of prefetch block */
325
  6,                                    /* number of parallel prefetches */
326
  2,                                    /* Branch cost */
327
  COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
328
  COSTS_N_INSNS (5),                    /* cost of FMUL instruction.  */
329
  COSTS_N_INSNS (56),                   /* cost of FDIV instruction.  */
330
  COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
331
  COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
332
  COSTS_N_INSNS (56),                   /* cost of FSQRT instruction.  */
333
};
334
 
335
static const
336
struct processor_costs k6_cost = {
337
  COSTS_N_INSNS (1),                    /* cost of an add instruction */
338
  COSTS_N_INSNS (2),                    /* cost of a lea instruction */
339
  COSTS_N_INSNS (1),                    /* variable shift costs */
340
  COSTS_N_INSNS (1),                    /* constant shift costs */
341
  {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
342
   COSTS_N_INSNS (3),                   /*                               HI */
343
   COSTS_N_INSNS (3),                   /*                               SI */
344
   COSTS_N_INSNS (3),                   /*                               DI */
345
   COSTS_N_INSNS (3)},                  /*                               other */
346
  0,                                     /* cost of multiply per each bit set */
347
  {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
348
   COSTS_N_INSNS (18),                  /*                          HI */
349
   COSTS_N_INSNS (18),                  /*                          SI */
350
   COSTS_N_INSNS (18),                  /*                          DI */
351
   COSTS_N_INSNS (18)},                 /*                          other */
352
  COSTS_N_INSNS (2),                    /* cost of movsx */
353
  COSTS_N_INSNS (2),                    /* cost of movzx */
354
  8,                                    /* "large" insn */
355
  4,                                    /* MOVE_RATIO */
356
  3,                                    /* cost for loading QImode using movzbl */
357
  {4, 5, 4},                            /* cost of loading integer registers
358
                                           in QImode, HImode and SImode.
359
                                           Relative to reg-reg move (2).  */
360
  {2, 3, 2},                            /* cost of storing integer registers */
361
  4,                                    /* cost of reg,reg fld/fst */
362
  {6, 6, 6},                            /* cost of loading fp registers
363
                                           in SFmode, DFmode and XFmode */
364
  {4, 4, 4},                            /* cost of storing fp registers
365
                                           in SFmode, DFmode and XFmode */
366
  2,                                    /* cost of moving MMX register */
367
  {2, 2},                               /* cost of loading MMX registers
368
                                           in SImode and DImode */
369
  {2, 2},                               /* cost of storing MMX registers
370
                                           in SImode and DImode */
371
  2,                                    /* cost of moving SSE register */
372
  {2, 2, 8},                            /* cost of loading SSE registers
373
                                           in SImode, DImode and TImode */
374
  {2, 2, 8},                            /* cost of storing SSE registers
375
                                           in SImode, DImode and TImode */
376
  6,                                    /* MMX or SSE register to integer */
377
  32,                                   /* size of prefetch block */
378
  1,                                    /* number of parallel prefetches */
379
  1,                                    /* Branch cost */
380
  COSTS_N_INSNS (2),                    /* cost of FADD and FSUB insns.  */
381
  COSTS_N_INSNS (2),                    /* cost of FMUL instruction.  */
382
  COSTS_N_INSNS (56),                   /* cost of FDIV instruction.  */
383
  COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
384
  COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
385
  COSTS_N_INSNS (56),                   /* cost of FSQRT instruction.  */
386
};
387
 
388
static const
389
struct processor_costs athlon_cost = {
390
  COSTS_N_INSNS (1),                    /* cost of an add instruction */
391
  COSTS_N_INSNS (2),                    /* cost of a lea instruction */
392
  COSTS_N_INSNS (1),                    /* variable shift costs */
393
  COSTS_N_INSNS (1),                    /* constant shift costs */
394
  {COSTS_N_INSNS (5),                   /* cost of starting multiply for QI */
395
   COSTS_N_INSNS (5),                   /*                               HI */
396
   COSTS_N_INSNS (5),                   /*                               SI */
397
   COSTS_N_INSNS (5),                   /*                               DI */
398
   COSTS_N_INSNS (5)},                  /*                               other */
399
  0,                                     /* cost of multiply per each bit set */
400
  {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
401
   COSTS_N_INSNS (26),                  /*                          HI */
402
   COSTS_N_INSNS (42),                  /*                          SI */
403
   COSTS_N_INSNS (74),                  /*                          DI */
404
   COSTS_N_INSNS (74)},                 /*                          other */
405
  COSTS_N_INSNS (1),                    /* cost of movsx */
406
  COSTS_N_INSNS (1),                    /* cost of movzx */
407
  8,                                    /* "large" insn */
408
  9,                                    /* MOVE_RATIO */
409
  4,                                    /* cost for loading QImode using movzbl */
410
  {3, 4, 3},                            /* cost of loading integer registers
411
                                           in QImode, HImode and SImode.
412
                                           Relative to reg-reg move (2).  */
413
  {3, 4, 3},                            /* cost of storing integer registers */
414
  4,                                    /* cost of reg,reg fld/fst */
415
  {4, 4, 12},                           /* cost of loading fp registers
416
                                           in SFmode, DFmode and XFmode */
417
  {6, 6, 8},                            /* cost of storing fp registers
418
                                           in SFmode, DFmode and XFmode */
419
  2,                                    /* cost of moving MMX register */
420
  {4, 4},                               /* cost of loading MMX registers
421
                                           in SImode and DImode */
422
  {4, 4},                               /* cost of storing MMX registers
423
                                           in SImode and DImode */
424
  2,                                    /* cost of moving SSE register */
425
  {4, 4, 6},                            /* cost of loading SSE registers
426
                                           in SImode, DImode and TImode */
427
  {4, 4, 5},                            /* cost of storing SSE registers
428
                                           in SImode, DImode and TImode */
429
  5,                                    /* MMX or SSE register to integer */
430
  64,                                   /* size of prefetch block */
431
  6,                                    /* number of parallel prefetches */
432
  5,                                    /* Branch cost */
433
  COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
434
  COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
435
  COSTS_N_INSNS (24),                   /* cost of FDIV instruction.  */
436
  COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
437
  COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
438
  COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
439
};
440
 
441
static const
442
struct processor_costs k8_cost = {
443
  COSTS_N_INSNS (1),                    /* cost of an add instruction */
444
  COSTS_N_INSNS (2),                    /* cost of a lea instruction */
445
  COSTS_N_INSNS (1),                    /* variable shift costs */
446
  COSTS_N_INSNS (1),                    /* constant shift costs */
447
  {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
448
   COSTS_N_INSNS (4),                   /*                               HI */
449
   COSTS_N_INSNS (3),                   /*                               SI */
450
   COSTS_N_INSNS (4),                   /*                               DI */
451
   COSTS_N_INSNS (5)},                  /*                               other */
452
  0,                                     /* cost of multiply per each bit set */
453
  {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
454
   COSTS_N_INSNS (26),                  /*                          HI */
455
   COSTS_N_INSNS (42),                  /*                          SI */
456
   COSTS_N_INSNS (74),                  /*                          DI */
457
   COSTS_N_INSNS (74)},                 /*                          other */
458
  COSTS_N_INSNS (1),                    /* cost of movsx */
459
  COSTS_N_INSNS (1),                    /* cost of movzx */
460
  8,                                    /* "large" insn */
461
  9,                                    /* MOVE_RATIO */
462
  4,                                    /* cost for loading QImode using movzbl */
463
  {3, 4, 3},                            /* cost of loading integer registers
464
                                           in QImode, HImode and SImode.
465
                                           Relative to reg-reg move (2).  */
466
  {3, 4, 3},                            /* cost of storing integer registers */
467
  4,                                    /* cost of reg,reg fld/fst */
468
  {4, 4, 12},                           /* cost of loading fp registers
469
                                           in SFmode, DFmode and XFmode */
470
  {6, 6, 8},                            /* cost of storing fp registers
471
                                           in SFmode, DFmode and XFmode */
472
  2,                                    /* cost of moving MMX register */
473
  {3, 3},                               /* cost of loading MMX registers
474
                                           in SImode and DImode */
475
  {4, 4},                               /* cost of storing MMX registers
476
                                           in SImode and DImode */
477
  2,                                    /* cost of moving SSE register */
478
  {4, 3, 6},                            /* cost of loading SSE registers
479
                                           in SImode, DImode and TImode */
480
  {4, 4, 5},                            /* cost of storing SSE registers
481
                                           in SImode, DImode and TImode */
482
  5,                                    /* MMX or SSE register to integer */
483
  64,                                   /* size of prefetch block */
484
  6,                                    /* number of parallel prefetches */
485
  5,                                    /* Branch cost */
486
  COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
487
  COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
488
  COSTS_N_INSNS (19),                   /* cost of FDIV instruction.  */
489
  COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
490
  COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
491
  COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
492
};
493
 
494
static const
495
struct processor_costs pentium4_cost = {
496
  COSTS_N_INSNS (1),                    /* cost of an add instruction */
497
  COSTS_N_INSNS (3),                    /* cost of a lea instruction */
498
  COSTS_N_INSNS (4),                    /* variable shift costs */
499
  COSTS_N_INSNS (4),                    /* constant shift costs */
500
  {COSTS_N_INSNS (15),                  /* cost of starting multiply for QI */
501
   COSTS_N_INSNS (15),                  /*                               HI */
502
   COSTS_N_INSNS (15),                  /*                               SI */
503
   COSTS_N_INSNS (15),                  /*                               DI */
504
   COSTS_N_INSNS (15)},                 /*                               other */
505
  0,                                     /* cost of multiply per each bit set */
506
  {COSTS_N_INSNS (56),                  /* cost of a divide/mod for QI */
507
   COSTS_N_INSNS (56),                  /*                          HI */
508
   COSTS_N_INSNS (56),                  /*                          SI */
509
   COSTS_N_INSNS (56),                  /*                          DI */
510
   COSTS_N_INSNS (56)},                 /*                          other */
511
  COSTS_N_INSNS (1),                    /* cost of movsx */
512
  COSTS_N_INSNS (1),                    /* cost of movzx */
513
  16,                                   /* "large" insn */
514
  6,                                    /* MOVE_RATIO */
515
  2,                                    /* cost for loading QImode using movzbl */
516
  {4, 5, 4},                            /* cost of loading integer registers
517
                                           in QImode, HImode and SImode.
518
                                           Relative to reg-reg move (2).  */
519
  {2, 3, 2},                            /* cost of storing integer registers */
520
  2,                                    /* cost of reg,reg fld/fst */
521
  {2, 2, 6},                            /* cost of loading fp registers
522
                                           in SFmode, DFmode and XFmode */
523
  {4, 4, 6},                            /* cost of storing fp registers
524
                                           in SFmode, DFmode and XFmode */
525
  2,                                    /* cost of moving MMX register */
526
  {2, 2},                               /* cost of loading MMX registers
527
                                           in SImode and DImode */
528
  {2, 2},                               /* cost of storing MMX registers
529
                                           in SImode and DImode */
530
  12,                                   /* cost of moving SSE register */
531
  {12, 12, 12},                         /* cost of loading SSE registers
532
                                           in SImode, DImode and TImode */
533
  {2, 2, 8},                            /* cost of storing SSE registers
534
                                           in SImode, DImode and TImode */
535
  10,                                   /* MMX or SSE register to integer */
536
  64,                                   /* size of prefetch block */
537
  6,                                    /* number of parallel prefetches */
538
  2,                                    /* Branch cost */
539
  COSTS_N_INSNS (5),                    /* cost of FADD and FSUB insns.  */
540
  COSTS_N_INSNS (7),                    /* cost of FMUL instruction.  */
541
  COSTS_N_INSNS (43),                   /* cost of FDIV instruction.  */
542
  COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
543
  COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
544
  COSTS_N_INSNS (43),                   /* cost of FSQRT instruction.  */
545
};
546
 
547
static const
548
struct processor_costs nocona_cost = {
549
  COSTS_N_INSNS (1),                    /* cost of an add instruction */
550
  COSTS_N_INSNS (1),                    /* cost of a lea instruction */
551
  COSTS_N_INSNS (1),                    /* variable shift costs */
552
  COSTS_N_INSNS (1),                    /* constant shift costs */
553
  {COSTS_N_INSNS (10),                  /* cost of starting multiply for QI */
554
   COSTS_N_INSNS (10),                  /*                               HI */
555
   COSTS_N_INSNS (10),                  /*                               SI */
556
   COSTS_N_INSNS (10),                  /*                               DI */
557
   COSTS_N_INSNS (10)},                 /*                               other */
558
  0,                                     /* cost of multiply per each bit set */
559
  {COSTS_N_INSNS (66),                  /* cost of a divide/mod for QI */
560
   COSTS_N_INSNS (66),                  /*                          HI */
561
   COSTS_N_INSNS (66),                  /*                          SI */
562
   COSTS_N_INSNS (66),                  /*                          DI */
563
   COSTS_N_INSNS (66)},                 /*                          other */
564
  COSTS_N_INSNS (1),                    /* cost of movsx */
565
  COSTS_N_INSNS (1),                    /* cost of movzx */
566
  16,                                   /* "large" insn */
567
  17,                                   /* MOVE_RATIO */
568
  4,                                    /* cost for loading QImode using movzbl */
569
  {4, 4, 4},                            /* cost of loading integer registers
570
                                           in QImode, HImode and SImode.
571
                                           Relative to reg-reg move (2).  */
572
  {4, 4, 4},                            /* cost of storing integer registers */
573
  3,                                    /* cost of reg,reg fld/fst */
574
  {12, 12, 12},                         /* cost of loading fp registers
575
                                           in SFmode, DFmode and XFmode */
576
  {4, 4, 4},                            /* cost of storing fp registers
577
                                           in SFmode, DFmode and XFmode */
578
  6,                                    /* cost of moving MMX register */
579
  {12, 12},                             /* cost of loading MMX registers
580
                                           in SImode and DImode */
581
  {12, 12},                             /* cost of storing MMX registers
582
                                           in SImode and DImode */
583
  6,                                    /* cost of moving SSE register */
584
  {12, 12, 12},                         /* cost of loading SSE registers
585
                                           in SImode, DImode and TImode */
586
  {12, 12, 12},                         /* cost of storing SSE registers
587
                                           in SImode, DImode and TImode */
588
  8,                                    /* MMX or SSE register to integer */
589
  128,                                  /* size of prefetch block */
590
  8,                                    /* number of parallel prefetches */
591
  1,                                    /* Branch cost */
592
  COSTS_N_INSNS (6),                    /* cost of FADD and FSUB insns.  */
593
  COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
594
  COSTS_N_INSNS (40),                   /* cost of FDIV instruction.  */
595
  COSTS_N_INSNS (3),                    /* cost of FABS instruction.  */
596
  COSTS_N_INSNS (3),                    /* cost of FCHS instruction.  */
597
  COSTS_N_INSNS (44),                   /* cost of FSQRT instruction.  */
598
};
599
 
600
/* Generic64 should produce code tuned for Nocona and K8.  */
601
static const
602
struct processor_costs generic64_cost = {
603
  COSTS_N_INSNS (1),                    /* cost of an add instruction */
604
  /* On all chips taken into consideration lea is 2 cycles and more.  With
605
     this cost however our current implementation of synth_mult results in
606
     use of unnecessary temporary registers causing regression on several
607
     SPECfp benchmarks.  */
608
  COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
609
  COSTS_N_INSNS (1),                    /* variable shift costs */
610
  COSTS_N_INSNS (1),                    /* constant shift costs */
611
  {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
612
   COSTS_N_INSNS (4),                   /*                               HI */
613
   COSTS_N_INSNS (3),                   /*                               SI */
614
   COSTS_N_INSNS (4),                   /*                               DI */
615
   COSTS_N_INSNS (2)},                  /*                               other */
616
  0,                                     /* cost of multiply per each bit set */
617
  {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
618
   COSTS_N_INSNS (26),                  /*                          HI */
619
   COSTS_N_INSNS (42),                  /*                          SI */
620
   COSTS_N_INSNS (74),                  /*                          DI */
621
   COSTS_N_INSNS (74)},                 /*                          other */
622
  COSTS_N_INSNS (1),                    /* cost of movsx */
623
  COSTS_N_INSNS (1),                    /* cost of movzx */
624
  8,                                    /* "large" insn */
625
  17,                                   /* MOVE_RATIO */
626
  4,                                    /* cost for loading QImode using movzbl */
627
  {4, 4, 4},                            /* cost of loading integer registers
628
                                           in QImode, HImode and SImode.
629
                                           Relative to reg-reg move (2).  */
630
  {4, 4, 4},                            /* cost of storing integer registers */
631
  4,                                    /* cost of reg,reg fld/fst */
632
  {12, 12, 12},                         /* cost of loading fp registers
633
                                           in SFmode, DFmode and XFmode */
634
  {6, 6, 8},                            /* cost of storing fp registers
635
                                           in SFmode, DFmode and XFmode */
636
  2,                                    /* cost of moving MMX register */
637
  {8, 8},                               /* cost of loading MMX registers
638
                                           in SImode and DImode */
639
  {8, 8},                               /* cost of storing MMX registers
640
                                           in SImode and DImode */
641
  2,                                    /* cost of moving SSE register */
642
  {8, 8, 8},                            /* cost of loading SSE registers
643
                                           in SImode, DImode and TImode */
644
  {8, 8, 8},                            /* cost of storing SSE registers
645
                                           in SImode, DImode and TImode */
646
  5,                                    /* MMX or SSE register to integer */
647
  64,                                   /* size of prefetch block */
648
  6,                                    /* number of parallel prefetches */
649
  /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
650
     is increased to perhaps more appropriate value of 5.  */
651
  3,                                    /* Branch cost */
652
  COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
653
  COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
654
  COSTS_N_INSNS (20),                   /* cost of FDIV instruction.  */
655
  COSTS_N_INSNS (8),                    /* cost of FABS instruction.  */
656
  COSTS_N_INSNS (8),                    /* cost of FCHS instruction.  */
657
  COSTS_N_INSNS (40),                   /* cost of FSQRT instruction.  */
658
};
659
 
660
/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8.  */
661
static const
662
struct processor_costs generic32_cost = {
663
  COSTS_N_INSNS (1),                    /* cost of an add instruction */
664
  COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
665
  COSTS_N_INSNS (1),                    /* variable shift costs */
666
  COSTS_N_INSNS (1),                    /* constant shift costs */
667
  {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
668
   COSTS_N_INSNS (4),                   /*                               HI */
669
   COSTS_N_INSNS (3),                   /*                               SI */
670
   COSTS_N_INSNS (4),                   /*                               DI */
671
   COSTS_N_INSNS (2)},                  /*                               other */
672
  0,                                     /* cost of multiply per each bit set */
673
  {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
674
   COSTS_N_INSNS (26),                  /*                          HI */
675
   COSTS_N_INSNS (42),                  /*                          SI */
676
   COSTS_N_INSNS (74),                  /*                          DI */
677
   COSTS_N_INSNS (74)},                 /*                          other */
678
  COSTS_N_INSNS (1),                    /* cost of movsx */
679
  COSTS_N_INSNS (1),                    /* cost of movzx */
680
  8,                                    /* "large" insn */
681
  17,                                   /* MOVE_RATIO */
682
  4,                                    /* cost for loading QImode using movzbl */
683
  {4, 4, 4},                            /* cost of loading integer registers
684
                                           in QImode, HImode and SImode.
685
                                           Relative to reg-reg move (2).  */
686
  {4, 4, 4},                            /* cost of storing integer registers */
687
  4,                                    /* cost of reg,reg fld/fst */
688
  {12, 12, 12},                         /* cost of loading fp registers
689
                                           in SFmode, DFmode and XFmode */
690
  {6, 6, 8},                            /* cost of storing fp registers
691
                                           in SFmode, DFmode and XFmode */
692
  2,                                    /* cost of moving MMX register */
693
  {8, 8},                               /* cost of loading MMX registers
694
                                           in SImode and DImode */
695
  {8, 8},                               /* cost of storing MMX registers
696
                                           in SImode and DImode */
697
  2,                                    /* cost of moving SSE register */
698
  {8, 8, 8},                            /* cost of loading SSE registers
699
                                           in SImode, DImode and TImode */
700
  {8, 8, 8},                            /* cost of storing SSE registers
701
                                           in SImode, DImode and TImode */
702
  5,                                    /* MMX or SSE register to integer */
703
  64,                                   /* size of prefetch block */
704
  6,                                    /* number of parallel prefetches */
705
  3,                                    /* Branch cost */
706
  COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
707
  COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
708
  COSTS_N_INSNS (20),                   /* cost of FDIV instruction.  */
709
  COSTS_N_INSNS (8),                    /* cost of FABS instruction.  */
710
  COSTS_N_INSNS (8),                    /* cost of FCHS instruction.  */
711
  COSTS_N_INSNS (40),                   /* cost of FSQRT instruction.  */
712
};
713
 
714
const struct processor_costs *ix86_cost = &pentium_cost;
715
 
716
/* Processor feature/optimization bitmasks.  */
717
#define m_386 (1<<PROCESSOR_I386)
718
#define m_486 (1<<PROCESSOR_I486)
719
#define m_PENT (1<<PROCESSOR_PENTIUM)
720
#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
721
#define m_K6  (1<<PROCESSOR_K6)
722
#define m_ATHLON  (1<<PROCESSOR_ATHLON)
723
#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
724
#define m_K8  (1<<PROCESSOR_K8)
725
#define m_ATHLON_K8  (m_K8 | m_ATHLON)
726
#define m_NOCONA  (1<<PROCESSOR_NOCONA)
727
#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
728
#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
729
#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
730
 
731
/* Generic instruction choice should be common subset of supported CPUs
732
   (PPro/PENT4/NOCONA/Athlon/K8).  */
733
 
734
/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
735
   Generic64 seems like good code size tradeoff.  We can't enable it for 32bit
736
   generic because it is not working well with PPro base chips.  */
737
const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
738
const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
739
const int x86_zero_extend_with_and = m_486 | m_PENT;
740
const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
741
const int x86_double_with_add = ~m_386;
742
const int x86_use_bit_test = m_386;
743
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
744
const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
745
const int x86_3dnow_a = m_ATHLON_K8;
746
const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
747
/* Branch hints were put in P4 based on simulation result. But
748
   after P4 was made, no performance benefit was observed with
749
   branch hints. It also increases the code size. As the result,
750
   icc never generates branch hints.  */
751
const int x86_branch_hints = 0;
752
const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
753
/* We probably ought to watch for partial register stalls on Generic32
754
   compilation setting as well.  However in current implementation the
755
   partial register stalls are not eliminated very well - they can
756
   be introduced via subregs synthesized by combine and can happen
757
   in caller/callee saving sequences.
758
   Because this option pays back little on PPro based chips and is in conflict
759
   with partial reg. dependencies used by Athlon/P4 based chips, it is better
760
   to leave it off for generic32 for now.  */
761
const int x86_partial_reg_stall = m_PPRO;
762
const int x86_partial_flag_reg_stall = m_GENERIC;
763
const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
764
const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
765
const int x86_use_mov0 = m_K6;
766
const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
767
const int x86_read_modify_write = ~m_PENT;
768
const int x86_read_modify = ~(m_PENT | m_PPRO);
769
const int x86_split_long_moves = m_PPRO;
770
const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
771
const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
772
const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
773
const int x86_qimode_math = ~(0);
774
const int x86_promote_qi_regs = 0;
775
/* On PPro this flag is meant to avoid partial register stalls.  Just like
776
   the x86_partial_reg_stall this option might be considered for Generic32
777
   if our scheme for avoiding partial stalls was more effective.  */
778
const int x86_himode_math = ~(m_PPRO);
779
const int x86_promote_hi_regs = m_PPRO;
780
const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
781
const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
782
const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
783
const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
784
const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
785
const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
786
const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
787
const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
788
const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
789
const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
790
const int x86_shift1 = ~m_486;
791
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
792
/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
793
   that thread 128bit SSE registers as single units versus K8 based chips that
794
   divide SSE registers to two 64bit halves.
795
   x86_sse_partial_reg_dependency promote all store destinations to be 128bit
796
   to allow register renaming on 128bit SSE units, but usually results in one
797
   extra microop on 64bit SSE units.  Experimental results shows that disabling
798
   this option on P4 brings over 20% SPECfp regression, while enabling it on
799
   K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
800
   of moves.  */
801
const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
802
/* Set for machines where the type and dependencies are resolved on SSE
803
   register parts instead of whole registers, so we may maintain just
804
   lower part of scalar values in proper format leaving the upper part
805
   undefined.  */
806
const int x86_sse_split_regs = m_ATHLON_K8;
807
const int x86_sse_typeless_stores = m_ATHLON_K8;
808
const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
809
const int x86_use_ffreep = m_ATHLON_K8;
810
const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
811
const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
812
 
813
/* ??? Allowing interunit moves makes it all too easy for the compiler to put
814
   integer data in xmm registers.  Which results in pretty abysmal code.  */
815
const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
816
 
817
const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
818
/* Some CPU cores are not able to predict more than 4 branch instructions in
819
   the 16 byte window.  */
820
const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
821
const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
822
const int x86_use_bt = m_ATHLON_K8;
823
/* Compare and exchange was added for 80486.  */
824
const int x86_cmpxchg = ~m_386;
825
/* Compare and exchange 8 bytes was added for pentium.  */
826
const int x86_cmpxchg8b = ~(m_386 | m_486);
827
/* Compare and exchange 16 bytes was added for nocona.  */
828
const int x86_cmpxchg16b = m_NOCONA;
829
/* Exchange and add was added for 80486.  */
830
const int x86_xadd = ~m_386;
831
const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
832
 
833
/* In case the average insn count for single function invocation is
834
   lower than this constant, emit fast (but longer) prologue and
835
   epilogue code.  */
836
#define FAST_PROLOGUE_INSN_COUNT 20
837
 
838
/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
839
static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
840
static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
841
static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
842
 
843
/* Array of the smallest class containing reg number REGNO, indexed by
844
   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
845
 
846
enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
847
{
848
  /* ax, dx, cx, bx */
849
  AREG, DREG, CREG, BREG,
850
  /* si, di, bp, sp */
851
  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
852
  /* FP registers */
853
  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
854
  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
855
  /* arg pointer */
856
  NON_Q_REGS,
857
  /* flags, fpsr, dirflag, frame */
858
  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
859
  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
860
  SSE_REGS, SSE_REGS,
861
  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
862
  MMX_REGS, MMX_REGS,
863
  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
864
  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
865
  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
866
  SSE_REGS, SSE_REGS,
867
};
868
 
869
/* The "default" register map used in 32bit mode.  */
870
 
871
int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
872
{
873
  0, 2, 1, 3, 6, 7, 4, 5,                /* general regs */
874
  12, 13, 14, 15, 16, 17, 18, 19,       /* fp regs */
875
  -1, -1, -1, -1, -1,                   /* arg, flags, fpsr, dir, frame */
876
  21, 22, 23, 24, 25, 26, 27, 28,       /* SSE */
877
  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
878
  -1, -1, -1, -1, -1, -1, -1, -1,       /* extended integer registers */
879
  -1, -1, -1, -1, -1, -1, -1, -1,       /* extended SSE registers */
880
};
881
 
882
static int const x86_64_int_parameter_registers[6] =
883
{
884
  5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
885
  FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
886
};
887
 
888
static int const x86_64_int_return_registers[4] =
889
{
890
 
891
};
892
 
893
/* The "default" register map used in 64bit mode.  */
894
int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
895
{
896
  0, 1, 2, 3, 4, 5, 6, 7,                /* general regs */
897
  33, 34, 35, 36, 37, 38, 39, 40,       /* fp regs */
898
  -1, -1, -1, -1, -1,                   /* arg, flags, fpsr, dir, frame */
899
  17, 18, 19, 20, 21, 22, 23, 24,       /* SSE */
900
  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
901
  8,9,10,11,12,13,14,15,                /* extended integer registers */
902
  25, 26, 27, 28, 29, 30, 31, 32,       /* extended SSE registers */
903
};
904
 
905
/* Define the register numbers to be used in Dwarf debugging information.
906
   The SVR4 reference port C compiler uses the following register numbers
907
   in its Dwarf output code:
908
 
909
        1 for %ecx (gcc regno = 2)
910
        2 for %edx (gcc regno = 1)
911
        3 for %ebx (gcc regno = 3)
912
        4 for %esp (gcc regno = 7)
913
        5 for %ebp (gcc regno = 6)
914
        6 for %esi (gcc regno = 4)
915
        7 for %edi (gcc regno = 5)
916
   The following three DWARF register numbers are never generated by
917
   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
918
   believes these numbers have these meanings.
919
        8  for %eip    (no gcc equivalent)
920
        9  for %eflags (gcc regno = 17)
921
        10 for %trapno (no gcc equivalent)
922
   It is not at all clear how we should number the FP stack registers
923
   for the x86 architecture.  If the version of SDB on x86/svr4 were
924
   a bit less brain dead with respect to floating-point then we would
925
   have a precedent to follow with respect to DWARF register numbers
926
   for x86 FP registers, but the SDB on x86/svr4 is so completely
927
   broken with respect to FP registers that it is hardly worth thinking
928
   of it as something to strive for compatibility with.
929
   The version of x86/svr4 SDB I have at the moment does (partially)
930
   seem to believe that DWARF register number 11 is associated with
931
   the x86 register %st(0), but that's about all.  Higher DWARF
932
   register numbers don't seem to be associated with anything in
933
   particular, and even for DWARF regno 11, SDB only seems to under-
934
   stand that it should say that a variable lives in %st(0) (when
935
   asked via an `=' command) if we said it was in DWARF regno 11,
936
   but SDB still prints garbage when asked for the value of the
937
   variable in question (via a `/' command).
938
   (Also note that the labels SDB prints for various FP stack regs
939
   when doing an `x' command are all wrong.)
940
   Note that these problems generally don't affect the native SVR4
941
   C compiler because it doesn't allow the use of -O with -g and
942
   because when it is *not* optimizing, it allocates a memory
943
   location for each floating-point variable, and the memory
944
   location is what gets described in the DWARF AT_location
945
   attribute for the variable in question.
946
   Regardless of the severe mental illness of the x86/svr4 SDB, we
947
   do something sensible here and we use the following DWARF
948
   register numbers.  Note that these are all stack-top-relative
949
   numbers.
950
        11 for %st(0) (gcc regno = 8)
951
        12 for %st(1) (gcc regno = 9)
952
        13 for %st(2) (gcc regno = 10)
953
        14 for %st(3) (gcc regno = 11)
954
        15 for %st(4) (gcc regno = 12)
955
        16 for %st(5) (gcc regno = 13)
956
        17 for %st(6) (gcc regno = 14)
957
        18 for %st(7) (gcc regno = 15)
958
*/
959
int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
960
{
961
  0, 2, 1, 3, 6, 7, 5, 4,                /* general regs */
962
  11, 12, 13, 14, 15, 16, 17, 18,       /* fp regs */
963
  -1, 9, -1, -1, -1,                    /* arg, flags, fpsr, dir, frame */
964
  21, 22, 23, 24, 25, 26, 27, 28,       /* SSE registers */
965
  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX registers */
966
  -1, -1, -1, -1, -1, -1, -1, -1,       /* extended integer registers */
967
  -1, -1, -1, -1, -1, -1, -1, -1,       /* extended SSE registers */
968
};
969
 
970
/* Test and compare insns in i386.md store the information needed to
971
   generate branch and scc insns here.  */
972
 
973
rtx ix86_compare_op0 = NULL_RTX;
974
rtx ix86_compare_op1 = NULL_RTX;
975
rtx ix86_compare_emitted = NULL_RTX;
976
 
977
/* Size of the register save area.  */
978
#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
979
 
980
/* Define the structure for the machine field in struct function.  */
981
 
982
struct stack_local_entry GTY(())
983
{
984
  unsigned short mode;
985
  unsigned short n;
986
  rtx rtl;
987
  struct stack_local_entry *next;
988
};
989
 
990
/* Structure describing stack frame layout.
991
   Stack grows downward:
992
 
993
   [arguments]
994
                                              <- ARG_POINTER
995
   saved pc
996
 
997
   saved frame pointer if frame_pointer_needed
998
                                              <- HARD_FRAME_POINTER
999
   [saved regs]
1000
 
1001
   [padding1]          \
1002
                        )
1003
   [va_arg registers]  (
1004
                        > to_allocate         <- FRAME_POINTER
1005
   [frame]             (
1006
                        )
1007
   [padding2]          /
1008
  */
1009
struct ix86_frame
1010
{
1011
  int nregs;
1012
  int padding1;
1013
  int va_arg_size;
1014
  HOST_WIDE_INT frame;
1015
  int padding2;
1016
  int outgoing_arguments_size;
1017
  int red_zone_size;
1018
 
1019
  HOST_WIDE_INT to_allocate;
1020
  /* The offsets relative to ARG_POINTER.  */
1021
  HOST_WIDE_INT frame_pointer_offset;
1022
  HOST_WIDE_INT hard_frame_pointer_offset;
1023
  HOST_WIDE_INT stack_pointer_offset;
1024
 
1025
  /* When save_regs_using_mov is set, emit prologue using
1026
     move instead of push instructions.  */
1027
  bool save_regs_using_mov;
1028
};
1029
 
1030
/* Code model option.  */
1031
enum cmodel ix86_cmodel;
1032
/* Asm dialect.  */
1033
enum asm_dialect ix86_asm_dialect = ASM_ATT;
1034
/* TLS dialects.  */
1035
enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1036
 
1037
/* Which unit we are generating floating point math for.  */
1038
enum fpmath_unit ix86_fpmath;
1039
 
1040
/* Which cpu are we scheduling for.  */
1041
enum processor_type ix86_tune;
1042
/* Which instruction set architecture to use.  */
1043
enum processor_type ix86_arch;
1044
 
1045
/* true if sse prefetch instruction is not NOOP.  */
1046
int x86_prefetch_sse;
1047
 
1048
/* ix86_regparm_string as a number */
1049
static int ix86_regparm;
1050
 
1051
/* -mstackrealign option */
1052
extern int ix86_force_align_arg_pointer;
1053
static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1054
 
1055
/* Preferred alignment for stack boundary in bits.  */
1056
unsigned int ix86_preferred_stack_boundary;
1057
 
1058
/* Values 1-5: see jump.c */
1059
int ix86_branch_cost;
1060
 
1061
/* Variables which are this size or smaller are put in the data/bss
1062
   or ldata/lbss sections.  */
1063
 
1064
int ix86_section_threshold = 65536;
1065
 
1066
/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
1067
char internal_label_prefix[16];
1068
int internal_label_prefix_len;
1069
 
1070
static bool ix86_handle_option (size_t, const char *, int);
1071
static void output_pic_addr_const (FILE *, rtx, int);
1072
static void put_condition_code (enum rtx_code, enum machine_mode,
1073
                                int, int, FILE *);
1074
static const char *get_some_local_dynamic_name (void);
1075
static int get_some_local_dynamic_name_1 (rtx *, void *);
1076
static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1077
static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1078
                                                   rtx *);
1079
static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1080
static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1081
                                                   enum machine_mode);
1082
static rtx get_thread_pointer (int);
1083
static rtx legitimize_tls_address (rtx, enum tls_model, int);
1084
static void get_pc_thunk_name (char [32], unsigned int);
1085
static rtx gen_push (rtx);
1086
static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1087
static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1088
static struct machine_function * ix86_init_machine_status (void);
1089
static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1090
static int ix86_nsaved_regs (void);
1091
static void ix86_emit_save_regs (void);
1092
static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1093
static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1094
static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1095
static HOST_WIDE_INT ix86_GOT_alias_set (void);
1096
static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1097
static rtx ix86_expand_aligntest (rtx, int);
1098
static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1099
static int ix86_issue_rate (void);
1100
static int ix86_adjust_cost (rtx, rtx, rtx, int);
1101
static int ia32_multipass_dfa_lookahead (void);
1102
static void ix86_init_mmx_sse_builtins (void);
1103
static rtx x86_this_parameter (tree);
1104
static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1105
                                 HOST_WIDE_INT, tree);
1106
static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1107
static void x86_file_start (void);
1108
static void ix86_reorg (void);
1109
static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1110
static tree ix86_build_builtin_va_list (void);
1111
static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1112
                                         tree, int *, int);
1113
static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1114
static bool ix86_scalar_mode_supported_p (enum machine_mode);
1115
static bool ix86_vector_mode_supported_p (enum machine_mode);
1116
 
1117
static int ix86_address_cost (rtx);
1118
static bool ix86_cannot_force_const_mem (rtx);
1119
static rtx ix86_delegitimize_address (rtx);
1120
 
1121
static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1122
 
1123
struct builtin_description;
1124
static rtx ix86_expand_sse_comi (const struct builtin_description *,
1125
                                 tree, rtx);
1126
static rtx ix86_expand_sse_compare (const struct builtin_description *,
1127
                                    tree, rtx);
1128
static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1129
static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1130
static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1131
static rtx ix86_expand_store_builtin (enum insn_code, tree);
1132
static rtx safe_vector_operand (rtx, enum machine_mode);
1133
static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1134
static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1135
static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1136
static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1137
static int ix86_fp_comparison_cost (enum rtx_code code);
1138
static unsigned int ix86_select_alt_pic_regnum (void);
1139
static int ix86_save_reg (unsigned int, int);
1140
static void ix86_compute_frame_layout (struct ix86_frame *);
1141
static int ix86_comp_type_attributes (tree, tree);
1142
static int ix86_function_regparm (tree, tree);
1143
const struct attribute_spec ix86_attribute_table[];
1144
static bool ix86_function_ok_for_sibcall (tree, tree);
1145
static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1146
static int ix86_value_regno (enum machine_mode, tree, tree);
1147
static bool contains_128bit_aligned_vector_p (tree);
1148
static rtx ix86_struct_value_rtx (tree, int);
1149
static bool ix86_ms_bitfield_layout_p (tree);
1150
static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1151
static int extended_reg_mentioned_1 (rtx *, void *);
1152
static bool ix86_rtx_costs (rtx, int, int, int *);
1153
static int min_insn_size (rtx);
1154
static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1155
static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1156
static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1157
                                    tree, bool);
1158
static void ix86_init_builtins (void);
1159
static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1160
static const char *ix86_mangle_fundamental_type (tree);
1161
static tree ix86_stack_protect_fail (void);
1162
static rtx ix86_internal_arg_pointer (void);
1163
static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1164
 
1165
/* This function is only used on Solaris.  */
1166
static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1167
  ATTRIBUTE_UNUSED;
1168
 
1169
/* Register class used for passing given 64bit part of the argument.
1170
   These represent classes as documented by the PS ABI, with the exception
1171
   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1172
   use SF or DFmode move instead of DImode to avoid reformatting penalties.
1173
 
1174
   Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1175
   whenever possible (upper half does contain padding).
1176
 */
1177
enum x86_64_reg_class
1178
  {
1179
    X86_64_NO_CLASS,
1180
    X86_64_INTEGER_CLASS,
1181
    X86_64_INTEGERSI_CLASS,
1182
    X86_64_SSE_CLASS,
1183
    X86_64_SSESF_CLASS,
1184
    X86_64_SSEDF_CLASS,
1185
    X86_64_SSEUP_CLASS,
1186
    X86_64_X87_CLASS,
1187
    X86_64_X87UP_CLASS,
1188
    X86_64_COMPLEX_X87_CLASS,
1189
    X86_64_MEMORY_CLASS
1190
  };
1191
static const char * const x86_64_reg_class_name[] = {
1192
  "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1193
  "sseup", "x87", "x87up", "cplx87", "no"
1194
};
1195
 
1196
#define MAX_CLASSES 4
1197
 
1198
/* Table of constants used by fldpi, fldln2, etc....  */
1199
static REAL_VALUE_TYPE ext_80387_constants_table [5];
1200
static bool ext_80387_constants_init = 0;
1201
static void init_ext_80387_constants (void);
1202
static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1203
static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1204
static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1205
static section *x86_64_elf_select_section (tree decl, int reloc,
1206
                                           unsigned HOST_WIDE_INT align)
1207
                                             ATTRIBUTE_UNUSED;
1208
 
1209
/* Initialize the GCC target structure.  */
1210
#undef TARGET_ATTRIBUTE_TABLE
1211
#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1212
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1213
#  undef TARGET_MERGE_DECL_ATTRIBUTES
1214
#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1215
#endif
1216
 
1217
#undef TARGET_COMP_TYPE_ATTRIBUTES
1218
#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1219
 
1220
#undef TARGET_INIT_BUILTINS
1221
#define TARGET_INIT_BUILTINS ix86_init_builtins
1222
#undef TARGET_EXPAND_BUILTIN
1223
#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1224
 
1225
#undef TARGET_ASM_FUNCTION_EPILOGUE
1226
#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1227
 
1228
#undef TARGET_ENCODE_SECTION_INFO
1229
#ifndef SUBTARGET_ENCODE_SECTION_INFO
1230
#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1231
#else
1232
#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1233
#endif
1234
 
1235
#undef TARGET_ASM_OPEN_PAREN
1236
#define TARGET_ASM_OPEN_PAREN ""
1237
#undef TARGET_ASM_CLOSE_PAREN
1238
#define TARGET_ASM_CLOSE_PAREN ""
1239
 
1240
#undef TARGET_ASM_ALIGNED_HI_OP
1241
#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1242
#undef TARGET_ASM_ALIGNED_SI_OP
1243
#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1244
#ifdef ASM_QUAD
1245
#undef TARGET_ASM_ALIGNED_DI_OP
1246
#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1247
#endif
1248
 
1249
#undef TARGET_ASM_UNALIGNED_HI_OP
1250
#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1251
#undef TARGET_ASM_UNALIGNED_SI_OP
1252
#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1253
#undef TARGET_ASM_UNALIGNED_DI_OP
1254
#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1255
 
1256
#undef TARGET_SCHED_ADJUST_COST
1257
#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1258
#undef TARGET_SCHED_ISSUE_RATE
1259
#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1260
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1261
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1262
  ia32_multipass_dfa_lookahead
1263
 
1264
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1265
#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1266
 
1267
#ifdef HAVE_AS_TLS
1268
#undef TARGET_HAVE_TLS
1269
#define TARGET_HAVE_TLS true
1270
#endif
1271
#undef TARGET_CANNOT_FORCE_CONST_MEM
1272
#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1273
#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1274
#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1275
 
1276
#undef TARGET_DELEGITIMIZE_ADDRESS
1277
#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1278
 
1279
#undef TARGET_MS_BITFIELD_LAYOUT_P
1280
#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1281
 
1282
#if TARGET_MACHO
1283
#undef TARGET_BINDS_LOCAL_P
1284
#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1285
#endif
1286
 
1287
#undef TARGET_ASM_OUTPUT_MI_THUNK
1288
#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1289
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1290
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1291
 
1292
#undef TARGET_ASM_FILE_START
1293
#define TARGET_ASM_FILE_START x86_file_start
1294
 
1295
#undef TARGET_DEFAULT_TARGET_FLAGS
1296
#define TARGET_DEFAULT_TARGET_FLAGS     \
1297
  (TARGET_DEFAULT                       \
1298
   | TARGET_64BIT_DEFAULT               \
1299
   | TARGET_SUBTARGET_DEFAULT           \
1300
   | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1301
 
1302
#undef TARGET_HANDLE_OPTION
1303
#define TARGET_HANDLE_OPTION ix86_handle_option
1304
 
1305
#undef TARGET_RTX_COSTS
1306
#define TARGET_RTX_COSTS ix86_rtx_costs
1307
#undef TARGET_ADDRESS_COST
1308
#define TARGET_ADDRESS_COST ix86_address_cost
1309
 
1310
#undef TARGET_FIXED_CONDITION_CODE_REGS
1311
#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1312
#undef TARGET_CC_MODES_COMPATIBLE
1313
#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1314
 
1315
#undef TARGET_MACHINE_DEPENDENT_REORG
1316
#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1317
 
1318
#undef TARGET_BUILD_BUILTIN_VA_LIST
1319
#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1320
 
1321
#undef TARGET_MD_ASM_CLOBBERS
1322
#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1323
 
1324
#undef TARGET_PROMOTE_PROTOTYPES
1325
#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1326
#undef TARGET_STRUCT_VALUE_RTX
1327
#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1328
#undef TARGET_SETUP_INCOMING_VARARGS
1329
#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1330
#undef TARGET_MUST_PASS_IN_STACK
1331
#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1332
#undef TARGET_PASS_BY_REFERENCE
1333
#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1334
#undef TARGET_INTERNAL_ARG_POINTER
1335
#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1336
#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1337
#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1338
 
1339
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1340
#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1341
 
1342
#undef TARGET_SCALAR_MODE_SUPPORTED_P
1343
#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1344
 
1345
#undef TARGET_VECTOR_MODE_SUPPORTED_P
1346
#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1347
 
1348
#ifdef HAVE_AS_TLS
1349
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1350
#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1351
#endif
1352
 
1353
#ifdef SUBTARGET_INSERT_ATTRIBUTES
1354
#undef TARGET_INSERT_ATTRIBUTES
1355
#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1356
#endif
1357
 
1358
#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1359
#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1360
 
1361
#undef TARGET_STACK_PROTECT_FAIL
1362
#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1363
 
1364
#undef TARGET_FUNCTION_VALUE
1365
#define TARGET_FUNCTION_VALUE ix86_function_value
1366
 
1367
struct gcc_target targetm = TARGET_INITIALIZER;
1368
 
1369
 
1370
/* The svr4 ABI for the i386 says that records and unions are returned
1371
   in memory.  */
1372
#ifndef DEFAULT_PCC_STRUCT_RETURN
1373
#define DEFAULT_PCC_STRUCT_RETURN 1
1374
#endif
1375
 
1376
/* Implement TARGET_HANDLE_OPTION.  */
1377
 
1378
static bool
1379
ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1380
{
1381
  switch (code)
1382
    {
1383
    case OPT_m3dnow:
1384
      if (!value)
1385
        {
1386
          target_flags &= ~MASK_3DNOW_A;
1387
          target_flags_explicit |= MASK_3DNOW_A;
1388
        }
1389
      return true;
1390
 
1391
    case OPT_mmmx:
1392
      if (!value)
1393
        {
1394
          target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1395
          target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1396
        }
1397
      return true;
1398
 
1399
    case OPT_msse:
1400
      if (!value)
1401
        {
1402
          target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1403
          target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1404
        }
1405
      return true;
1406
 
1407
    case OPT_msse2:
1408
      if (!value)
1409
        {
1410
          target_flags &= ~MASK_SSE3;
1411
          target_flags_explicit |= MASK_SSE3;
1412
        }
1413
      return true;
1414
 
1415
    default:
1416
      return true;
1417
    }
1418
}
1419
 
1420
/* Sometimes certain combinations of command options do not make
1421
   sense on a particular target machine.  You can define a macro
1422
   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
1423
   defined, is executed once just after all the command options have
1424
   been parsed.
1425
 
1426
   Don't use this macro to turn on various extra optimizations for
1427
   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
1428
 
1429
void
1430
override_options (void)
1431
{
1432
  int i;
1433
  int ix86_tune_defaulted = 0;
1434
 
1435
  /* Comes from final.c -- no real reason to change it.  */
1436
#define MAX_CODE_ALIGN 16
1437
 
1438
  static struct ptt
1439
    {
1440
      const struct processor_costs *cost;       /* Processor costs */
1441
      const int target_enable;                  /* Target flags to enable.  */
1442
      const int target_disable;                 /* Target flags to disable.  */
1443
      const int align_loop;                     /* Default alignments.  */
1444
      const int align_loop_max_skip;
1445
      const int align_jump;
1446
      const int align_jump_max_skip;
1447
      const int align_func;
1448
    }
1449
  const processor_target_table[PROCESSOR_max] =
1450
    {
1451
      {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1452
      {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1453
      {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1454
      {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1455
      {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1456
      {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1457
      {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1458
      {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1459
      {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1460
      {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1461
      {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1462
    };
1463
 
1464
  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1465
  static struct pta
1466
    {
1467
      const char *const name;           /* processor name or nickname.  */
1468
      const enum processor_type processor;
1469
      const enum pta_flags
1470
        {
1471
          PTA_SSE = 1,
1472
          PTA_SSE2 = 2,
1473
          PTA_SSE3 = 4,
1474
          PTA_MMX = 8,
1475
          PTA_PREFETCH_SSE = 16,
1476
          PTA_3DNOW = 32,
1477
          PTA_3DNOW_A = 64,
1478
          PTA_64BIT = 128
1479
        } flags;
1480
    }
1481
  const processor_alias_table[] =
1482
    {
1483
      {"i386", PROCESSOR_I386, 0},
1484
      {"i486", PROCESSOR_I486, 0},
1485
      {"i586", PROCESSOR_PENTIUM, 0},
1486
      {"pentium", PROCESSOR_PENTIUM, 0},
1487
      {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1488
      {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1489
      {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1490
      {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1491
      {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1492
      {"i686", PROCESSOR_PENTIUMPRO, 0},
1493
      {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1494
      {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1495
      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1496
      {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1497
      {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1498
      {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1499
                                       | PTA_MMX | PTA_PREFETCH_SSE},
1500
      {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1501
                                        | PTA_MMX | PTA_PREFETCH_SSE},
1502
      {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1503
                                        | PTA_MMX | PTA_PREFETCH_SSE},
1504
      {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1505
                                        | PTA_MMX | PTA_PREFETCH_SSE},
1506
      {"k6", PROCESSOR_K6, PTA_MMX},
1507
      {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1508
      {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1509
      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1510
                                   | PTA_3DNOW_A},
1511
      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1512
                                         | PTA_3DNOW | PTA_3DNOW_A},
1513
      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1514
                                    | PTA_3DNOW_A | PTA_SSE},
1515
      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1516
                                      | PTA_3DNOW_A | PTA_SSE},
1517
      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1518
                                      | PTA_3DNOW_A | PTA_SSE},
1519
      {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1520
                               | PTA_SSE | PTA_SSE2 },
1521
      {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1522
                                      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1523
      {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1524
                                      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1525
      {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1526
                                      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1527
      {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1528
                                      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1529
      {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch.  */ },
1530
      {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch.  */ },
1531
    };
1532
 
1533
  int const pta_size = ARRAY_SIZE (processor_alias_table);
1534
 
1535
#ifdef SUBTARGET_OVERRIDE_OPTIONS
1536
  SUBTARGET_OVERRIDE_OPTIONS;
1537
#endif
1538
 
1539
#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1540
  SUBSUBTARGET_OVERRIDE_OPTIONS;
1541
#endif
1542
 
1543
  /* -fPIC is the default for x86_64.  */
1544
  if (TARGET_MACHO && TARGET_64BIT)
1545
    flag_pic = 2;
1546
 
1547
  /* Set the default values for switches whose default depends on TARGET_64BIT
1548
     in case they weren't overwritten by command line options.  */
1549
  if (TARGET_64BIT)
1550
    {
1551
      /* Mach-O doesn't support omitting the frame pointer for now.  */
1552
      if (flag_omit_frame_pointer == 2)
1553
        flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1554
      if (flag_asynchronous_unwind_tables == 2)
1555
        flag_asynchronous_unwind_tables = 1;
1556
      if (flag_pcc_struct_return == 2)
1557
        flag_pcc_struct_return = 0;
1558
    }
1559
  else
1560
    {
1561
      if (flag_omit_frame_pointer == 2)
1562
        flag_omit_frame_pointer = 0;
1563
      if (flag_asynchronous_unwind_tables == 2)
1564
        flag_asynchronous_unwind_tables = 0;
1565
      if (flag_pcc_struct_return == 2)
1566
        flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1567
    }
1568
 
1569
  /* Need to check -mtune=generic first.  */
1570
  if (ix86_tune_string)
1571
    {
1572
      if (!strcmp (ix86_tune_string, "generic")
1573
          || !strcmp (ix86_tune_string, "i686")
1574
          /* As special support for cross compilers we read -mtune=native
1575
             as -mtune=generic.  With native compilers we won't see the
1576
             -mtune=native, as it was changed by the driver.  */
1577
          || !strcmp (ix86_tune_string, "native"))
1578
        {
1579
          if (TARGET_64BIT)
1580
            ix86_tune_string = "generic64";
1581
          else
1582
            ix86_tune_string = "generic32";
1583
        }
1584
      else if (!strncmp (ix86_tune_string, "generic", 7))
1585
        error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1586
    }
1587
  else
1588
    {
1589
      if (ix86_arch_string)
1590
        ix86_tune_string = ix86_arch_string;
1591
      if (!ix86_tune_string)
1592
        {
1593
          ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1594
          ix86_tune_defaulted = 1;
1595
        }
1596
 
1597
      /* ix86_tune_string is set to ix86_arch_string or defaulted.  We
1598
         need to use a sensible tune option.  */
1599
      if (!strcmp (ix86_tune_string, "generic")
1600
          || !strcmp (ix86_tune_string, "x86-64")
1601
          || !strcmp (ix86_tune_string, "i686"))
1602
        {
1603
          if (TARGET_64BIT)
1604
            ix86_tune_string = "generic64";
1605
          else
1606
            ix86_tune_string = "generic32";
1607
        }
1608
    }
1609
  if (!strcmp (ix86_tune_string, "x86-64"))
1610
    warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated.  Use -mtune=k8 or "
1611
             "-mtune=generic instead as appropriate.");
1612
 
1613
  if (!ix86_arch_string)
1614
    ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1615
  if (!strcmp (ix86_arch_string, "generic"))
1616
    error ("generic CPU can be used only for -mtune= switch");
1617
  if (!strncmp (ix86_arch_string, "generic", 7))
1618
    error ("bad value (%s) for -march= switch", ix86_arch_string);
1619
 
1620
  if (ix86_cmodel_string != 0)
1621
    {
1622
      if (!strcmp (ix86_cmodel_string, "small"))
1623
        ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1624
      else if (!strcmp (ix86_cmodel_string, "medium"))
1625
        ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1626
      else if (flag_pic)
1627
        sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1628
      else if (!strcmp (ix86_cmodel_string, "32"))
1629
        ix86_cmodel = CM_32;
1630
      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1631
        ix86_cmodel = CM_KERNEL;
1632
      else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1633
        ix86_cmodel = CM_LARGE;
1634
      else
1635
        error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1636
    }
1637
  else
1638
    {
1639
      ix86_cmodel = CM_32;
1640
      if (TARGET_64BIT)
1641
        ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1642
    }
1643
  if (ix86_asm_string != 0)
1644
    {
1645
      if (! TARGET_MACHO
1646
          && !strcmp (ix86_asm_string, "intel"))
1647
        ix86_asm_dialect = ASM_INTEL;
1648
      else if (!strcmp (ix86_asm_string, "att"))
1649
        ix86_asm_dialect = ASM_ATT;
1650
      else
1651
        error ("bad value (%s) for -masm= switch", ix86_asm_string);
1652
    }
1653
  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1654
    error ("code model %qs not supported in the %s bit mode",
1655
           ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1656
  if (ix86_cmodel == CM_LARGE)
1657
    sorry ("code model %<large%> not supported yet");
1658
  if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1659
    sorry ("%i-bit mode not compiled in",
1660
           (target_flags & MASK_64BIT) ? 64 : 32);
1661
 
1662
  for (i = 0; i < pta_size; i++)
1663
    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1664
      {
1665
        ix86_arch = processor_alias_table[i].processor;
1666
        /* Default cpu tuning to the architecture.  */
1667
        ix86_tune = ix86_arch;
1668
        if (processor_alias_table[i].flags & PTA_MMX
1669
            && !(target_flags_explicit & MASK_MMX))
1670
          target_flags |= MASK_MMX;
1671
        if (processor_alias_table[i].flags & PTA_3DNOW
1672
            && !(target_flags_explicit & MASK_3DNOW))
1673
          target_flags |= MASK_3DNOW;
1674
        if (processor_alias_table[i].flags & PTA_3DNOW_A
1675
            && !(target_flags_explicit & MASK_3DNOW_A))
1676
          target_flags |= MASK_3DNOW_A;
1677
        if (processor_alias_table[i].flags & PTA_SSE
1678
            && !(target_flags_explicit & MASK_SSE))
1679
          target_flags |= MASK_SSE;
1680
        if (processor_alias_table[i].flags & PTA_SSE2
1681
            && !(target_flags_explicit & MASK_SSE2))
1682
          target_flags |= MASK_SSE2;
1683
        if (processor_alias_table[i].flags & PTA_SSE3
1684
            && !(target_flags_explicit & MASK_SSE3))
1685
          target_flags |= MASK_SSE3;
1686
        if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1687
          x86_prefetch_sse = true;
1688
        if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1689
          error ("CPU you selected does not support x86-64 "
1690
                 "instruction set");
1691
        break;
1692
      }
1693
 
1694
  if (i == pta_size)
1695
    error ("bad value (%s) for -march= switch", ix86_arch_string);
1696
 
1697
  for (i = 0; i < pta_size; i++)
1698
    if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1699
      {
1700
        ix86_tune = processor_alias_table[i].processor;
1701
        if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1702
          {
1703
            if (ix86_tune_defaulted)
1704
              {
1705
                ix86_tune_string = "x86-64";
1706
                for (i = 0; i < pta_size; i++)
1707
                  if (! strcmp (ix86_tune_string,
1708
                                processor_alias_table[i].name))
1709
                    break;
1710
                ix86_tune = processor_alias_table[i].processor;
1711
              }
1712
            else
1713
              error ("CPU you selected does not support x86-64 "
1714
                     "instruction set");
1715
          }
1716
        /* Intel CPUs have always interpreted SSE prefetch instructions as
1717
           NOPs; so, we can enable SSE prefetch instructions even when
1718
           -mtune (rather than -march) points us to a processor that has them.
1719
           However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1720
           higher processors.  */
1721
        if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1722
          x86_prefetch_sse = true;
1723
        break;
1724
      }
1725
  if (i == pta_size)
1726
    error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1727
 
1728
  if (optimize_size)
1729
    ix86_cost = &size_cost;
1730
  else
1731
    ix86_cost = processor_target_table[ix86_tune].cost;
1732
  target_flags |= processor_target_table[ix86_tune].target_enable;
1733
  target_flags &= ~processor_target_table[ix86_tune].target_disable;
1734
 
1735
  /* Arrange to set up i386_stack_locals for all functions.  */
1736
  init_machine_status = ix86_init_machine_status;
1737
 
1738
  /* Validate -mregparm= value.  */
1739
  if (ix86_regparm_string)
1740
    {
1741
      i = atoi (ix86_regparm_string);
1742
      if (i < 0 || i > REGPARM_MAX)
1743
        error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1744
      else
1745
        ix86_regparm = i;
1746
    }
1747
  else
1748
   if (TARGET_64BIT)
1749
     ix86_regparm = REGPARM_MAX;
1750
 
1751
  /* If the user has provided any of the -malign-* options,
1752
     warn and use that value only if -falign-* is not set.
1753
     Remove this code in GCC 3.2 or later.  */
1754
  if (ix86_align_loops_string)
1755
    {
1756
      warning (0, "-malign-loops is obsolete, use -falign-loops");
1757
      if (align_loops == 0)
1758
        {
1759
          i = atoi (ix86_align_loops_string);
1760
          if (i < 0 || i > MAX_CODE_ALIGN)
1761
            error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1762
          else
1763
            align_loops = 1 << i;
1764
        }
1765
    }
1766
 
1767
  if (ix86_align_jumps_string)
1768
    {
1769
      warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1770
      if (align_jumps == 0)
1771
        {
1772
          i = atoi (ix86_align_jumps_string);
1773
          if (i < 0 || i > MAX_CODE_ALIGN)
1774
            error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1775
          else
1776
            align_jumps = 1 << i;
1777
        }
1778
    }
1779
 
1780
  if (ix86_align_funcs_string)
1781
    {
1782
      warning (0, "-malign-functions is obsolete, use -falign-functions");
1783
      if (align_functions == 0)
1784
        {
1785
          i = atoi (ix86_align_funcs_string);
1786
          if (i < 0 || i > MAX_CODE_ALIGN)
1787
            error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1788
          else
1789
            align_functions = 1 << i;
1790
        }
1791
    }
1792
 
1793
  /* Default align_* from the processor table.  */
1794
  if (align_loops == 0)
1795
    {
1796
      align_loops = processor_target_table[ix86_tune].align_loop;
1797
      align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1798
    }
1799
  if (align_jumps == 0)
1800
    {
1801
      align_jumps = processor_target_table[ix86_tune].align_jump;
1802
      align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1803
    }
1804
  if (align_functions == 0)
1805
    {
1806
      align_functions = processor_target_table[ix86_tune].align_func;
1807
    }
1808
 
1809
  /* Validate -mbranch-cost= value, or provide default.  */
1810
  ix86_branch_cost = ix86_cost->branch_cost;
1811
  if (ix86_branch_cost_string)
1812
    {
1813
      i = atoi (ix86_branch_cost_string);
1814
      if (i < 0 || i > 5)
1815
        error ("-mbranch-cost=%d is not between 0 and 5", i);
1816
      else
1817
        ix86_branch_cost = i;
1818
    }
1819
  if (ix86_section_threshold_string)
1820
    {
1821
      i = atoi (ix86_section_threshold_string);
1822
      if (i < 0)
1823
        error ("-mlarge-data-threshold=%d is negative", i);
1824
      else
1825
        ix86_section_threshold = i;
1826
    }
1827
 
1828
  if (ix86_tls_dialect_string)
1829
    {
1830
      if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1831
        ix86_tls_dialect = TLS_DIALECT_GNU;
1832
      else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1833
        ix86_tls_dialect = TLS_DIALECT_GNU2;
1834
      else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1835
        ix86_tls_dialect = TLS_DIALECT_SUN;
1836
      else
1837
        error ("bad value (%s) for -mtls-dialect= switch",
1838
               ix86_tls_dialect_string);
1839
    }
1840
 
1841
  /* Keep nonleaf frame pointers.  */
1842
  if (flag_omit_frame_pointer)
1843
    target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1844
  else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1845
    flag_omit_frame_pointer = 1;
1846
 
1847
  /* If we're doing fast math, we don't care about comparison order
1848
     wrt NaNs.  This lets us use a shorter comparison sequence.  */
1849
  if (flag_finite_math_only)
1850
    target_flags &= ~MASK_IEEE_FP;
1851
 
1852
  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1853
     since the insns won't need emulation.  */
1854
  if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1855
    target_flags &= ~MASK_NO_FANCY_MATH_387;
1856
 
1857
  /* Likewise, if the target doesn't have a 387, or we've specified
1858
     software floating point, don't use 387 inline intrinsics.  */
1859
  if (!TARGET_80387)
1860
    target_flags |= MASK_NO_FANCY_MATH_387;
1861
 
1862
  /* Turn on SSE2 builtins for -msse3.  */
1863
  if (TARGET_SSE3)
1864
    target_flags |= MASK_SSE2;
1865
 
1866
  /* Turn on SSE builtins for -msse2.  */
1867
  if (TARGET_SSE2)
1868
    target_flags |= MASK_SSE;
1869
 
1870
  /* Turn on MMX builtins for -msse.  */
1871
  if (TARGET_SSE)
1872
    {
1873
      target_flags |= MASK_MMX & ~target_flags_explicit;
1874
      x86_prefetch_sse = true;
1875
    }
1876
 
1877
  /* Turn on MMX builtins for 3Dnow.  */
1878
  if (TARGET_3DNOW)
1879
    target_flags |= MASK_MMX;
1880
 
1881
  if (TARGET_64BIT)
1882
    {
1883
      if (TARGET_ALIGN_DOUBLE)
1884
        error ("-malign-double makes no sense in the 64bit mode");
1885
      if (TARGET_RTD)
1886
        error ("-mrtd calling convention not supported in the 64bit mode");
1887
 
1888
      /* Enable by default the SSE and MMX builtins.  Do allow the user to
1889
         explicitly disable any of these.  In particular, disabling SSE and
1890
         MMX for kernel code is extremely useful.  */
1891
      target_flags
1892
        |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1893
            & ~target_flags_explicit);
1894
     }
1895
  else
1896
    {
1897
      /* i386 ABI does not specify red zone.  It still makes sense to use it
1898
         when programmer takes care to stack from being destroyed.  */
1899
      if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1900
        target_flags |= MASK_NO_RED_ZONE;
1901
    }
1902
 
1903
  /* Validate -mpreferred-stack-boundary= value, or provide default.
1904
     The default of 128 bits is for Pentium III's SSE __m128.  We can't
1905
     change it because of optimize_size.  Otherwise, we can't mix object
1906
     files compiled with -Os and -On.  */
1907
  ix86_preferred_stack_boundary = 128;
1908
  if (ix86_preferred_stack_boundary_string)
1909
    {
1910
      i = atoi (ix86_preferred_stack_boundary_string);
1911
      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1912
        error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1913
               TARGET_64BIT ? 4 : 2);
1914
      else
1915
        ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1916
    }
1917
 
1918
  /* Accept -msseregparm only if at least SSE support is enabled.  */
1919
  if (TARGET_SSEREGPARM
1920
      && ! TARGET_SSE)
1921
    error ("-msseregparm used without SSE enabled");
1922
 
1923
  ix86_fpmath = TARGET_FPMATH_DEFAULT;
1924
 
1925
  if (ix86_fpmath_string != 0)
1926
    {
1927
      if (! strcmp (ix86_fpmath_string, "387"))
1928
        ix86_fpmath = FPMATH_387;
1929
      else if (! strcmp (ix86_fpmath_string, "sse"))
1930
        {
1931
          if (!TARGET_SSE)
1932
            {
1933
              warning (0, "SSE instruction set disabled, using 387 arithmetics");
1934
              ix86_fpmath = FPMATH_387;
1935
            }
1936
          else
1937
            ix86_fpmath = FPMATH_SSE;
1938
        }
1939
      else if (! strcmp (ix86_fpmath_string, "387,sse")
1940
               || ! strcmp (ix86_fpmath_string, "sse,387"))
1941
        {
1942
          if (!TARGET_SSE)
1943
            {
1944
              warning (0, "SSE instruction set disabled, using 387 arithmetics");
1945
              ix86_fpmath = FPMATH_387;
1946
            }
1947
          else if (!TARGET_80387)
1948
            {
1949
              warning (0, "387 instruction set disabled, using SSE arithmetics");
1950
              ix86_fpmath = FPMATH_SSE;
1951
            }
1952
          else
1953
            ix86_fpmath = FPMATH_SSE | FPMATH_387;
1954
        }
1955
      else
1956
        error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1957
    }
1958
 
1959
  /* If the i387 is disabled, then do not return values in it. */
1960
  if (!TARGET_80387)
1961
    target_flags &= ~MASK_FLOAT_RETURNS;
1962
 
1963
  if ((x86_accumulate_outgoing_args & TUNEMASK)
1964
      && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1965
      && !optimize_size)
1966
    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1967
 
1968
  /* ??? Unwind info is not correct around the CFG unless either a frame
1969
     pointer is present or M_A_O_A is set.  Fixing this requires rewriting
1970
     unwind info generation to be aware of the CFG and propagating states
1971
     around edges.  */
1972
  if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1973
       || flag_exceptions || flag_non_call_exceptions)
1974
      && flag_omit_frame_pointer
1975
      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1976
    {
1977
      if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1978
        warning (0, "unwind tables currently require either a frame pointer "
1979
                 "or -maccumulate-outgoing-args for correctness");
1980
      target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1981
    }
1982
 
1983
  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
1984
  {
1985
    char *p;
1986
    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1987
    p = strchr (internal_label_prefix, 'X');
1988
    internal_label_prefix_len = p - internal_label_prefix;
1989
    *p = '\0';
1990
  }
1991
 
1992
  /* When scheduling description is not available, disable scheduler pass
1993
     so it won't slow down the compilation and make x87 code slower.  */
1994
  if (!TARGET_SCHEDULE)
1995
    flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1996
}
1997
 
1998
/* switch to the appropriate section for output of DECL.
1999
   DECL is either a `VAR_DECL' node or a constant of some sort.
2000
   RELOC indicates whether forming the initial value of DECL requires
2001
   link-time relocations.  */
2002
 
2003
static section *
2004
x86_64_elf_select_section (tree decl, int reloc,
2005
                           unsigned HOST_WIDE_INT align)
2006
{
2007
  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2008
      && ix86_in_large_data_p (decl))
2009
    {
2010
      const char *sname = NULL;
2011
      unsigned int flags = SECTION_WRITE;
2012
      switch (categorize_decl_for_section (decl, reloc))
2013
        {
2014
        case SECCAT_DATA:
2015
          sname = ".ldata";
2016
          break;
2017
        case SECCAT_DATA_REL:
2018
          sname = ".ldata.rel";
2019
          break;
2020
        case SECCAT_DATA_REL_LOCAL:
2021
          sname = ".ldata.rel.local";
2022
          break;
2023
        case SECCAT_DATA_REL_RO:
2024
          sname = ".ldata.rel.ro";
2025
          break;
2026
        case SECCAT_DATA_REL_RO_LOCAL:
2027
          sname = ".ldata.rel.ro.local";
2028
          break;
2029
        case SECCAT_BSS:
2030
          sname = ".lbss";
2031
          flags |= SECTION_BSS;
2032
          break;
2033
        case SECCAT_RODATA:
2034
        case SECCAT_RODATA_MERGE_STR:
2035
        case SECCAT_RODATA_MERGE_STR_INIT:
2036
        case SECCAT_RODATA_MERGE_CONST:
2037
          sname = ".lrodata";
2038
          flags = 0;
2039
          break;
2040
        case SECCAT_SRODATA:
2041
        case SECCAT_SDATA:
2042
        case SECCAT_SBSS:
2043
          gcc_unreachable ();
2044
        case SECCAT_TEXT:
2045
        case SECCAT_TDATA:
2046
        case SECCAT_TBSS:
2047
          /* We don't split these for medium model.  Place them into
2048
             default sections and hope for best.  */
2049
          break;
2050
        }
2051
      if (sname)
2052
        {
2053
          /* We might get called with string constants, but get_named_section
2054
             doesn't like them as they are not DECLs.  Also, we need to set
2055
             flags in that case.  */
2056
          if (!DECL_P (decl))
2057
            return get_section (sname, flags, NULL);
2058
          return get_named_section (decl, sname, reloc);
2059
        }
2060
    }
2061
  return default_elf_select_section (decl, reloc, align);
2062
}
2063
 
2064
/* Build up a unique section name, expressed as a
2065
   STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2066
   RELOC indicates whether the initial value of EXP requires
2067
   link-time relocations.  */
2068
 
2069
static void
2070
x86_64_elf_unique_section (tree decl, int reloc)
2071
{
2072
  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2073
      && ix86_in_large_data_p (decl))
2074
    {
2075
      const char *prefix = NULL;
2076
      /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
2077
      bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2078
 
2079
      switch (categorize_decl_for_section (decl, reloc))
2080
        {
2081
        case SECCAT_DATA:
2082
        case SECCAT_DATA_REL:
2083
        case SECCAT_DATA_REL_LOCAL:
2084
        case SECCAT_DATA_REL_RO:
2085
        case SECCAT_DATA_REL_RO_LOCAL:
2086
          prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2087
          break;
2088
        case SECCAT_BSS:
2089
          prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2090
          break;
2091
        case SECCAT_RODATA:
2092
        case SECCAT_RODATA_MERGE_STR:
2093
        case SECCAT_RODATA_MERGE_STR_INIT:
2094
        case SECCAT_RODATA_MERGE_CONST:
2095
          prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2096
          break;
2097
        case SECCAT_SRODATA:
2098
        case SECCAT_SDATA:
2099
        case SECCAT_SBSS:
2100
          gcc_unreachable ();
2101
        case SECCAT_TEXT:
2102
        case SECCAT_TDATA:
2103
        case SECCAT_TBSS:
2104
          /* We don't split these for medium model.  Place them into
2105
             default sections and hope for best.  */
2106
          break;
2107
        }
2108
      if (prefix)
2109
        {
2110
          const char *name;
2111
          size_t nlen, plen;
2112
          char *string;
2113
          plen = strlen (prefix);
2114
 
2115
          name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2116
          name = targetm.strip_name_encoding (name);
2117
          nlen = strlen (name);
2118
 
2119
          string = alloca (nlen + plen + 1);
2120
          memcpy (string, prefix, plen);
2121
          memcpy (string + plen, name, nlen + 1);
2122
 
2123
          DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2124
          return;
2125
        }
2126
    }
2127
  default_unique_section (decl, reloc);
2128
}
2129
 
2130
#ifdef COMMON_ASM_OP
2131
/* This says how to output assembler code to declare an
2132
   uninitialized external linkage data object.
2133
 
2134
   For medium model x86-64 we need to use .largecomm opcode for
2135
   large objects.  */
2136
void
2137
x86_elf_aligned_common (FILE *file,
2138
                        const char *name, unsigned HOST_WIDE_INT size,
2139
                        int align)
2140
{
2141
  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2142
      && size > (unsigned int)ix86_section_threshold)
2143
    fprintf (file, ".largecomm\t");
2144
  else
2145
    fprintf (file, "%s", COMMON_ASM_OP);
2146
  assemble_name (file, name);
2147
  fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2148
           size, align / BITS_PER_UNIT);
2149
}
2150
 
2151
/* Utility function for targets to use in implementing
2152
   ASM_OUTPUT_ALIGNED_BSS.  */
2153
 
2154
void
2155
x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2156
                        const char *name, unsigned HOST_WIDE_INT size,
2157
                        int align)
2158
{
2159
  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2160
      && size > (unsigned int)ix86_section_threshold)
2161
    switch_to_section (get_named_section (decl, ".lbss", 0));
2162
  else
2163
    switch_to_section (bss_section);
2164
  ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2165
#ifdef ASM_DECLARE_OBJECT_NAME
2166
  last_assemble_variable_decl = decl;
2167
  ASM_DECLARE_OBJECT_NAME (file, name, decl);
2168
#else
2169
  /* Standard thing is just output label for the object.  */
2170
  ASM_OUTPUT_LABEL (file, name);
2171
#endif /* ASM_DECLARE_OBJECT_NAME */
2172
  ASM_OUTPUT_SKIP (file, size ? size : 1);
2173
}
2174
#endif
2175
 
2176
void
2177
optimization_options (int level, int size ATTRIBUTE_UNUSED)
2178
{
2179
  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
2180
     make the problem with not enough registers even worse.  */
2181
#ifdef INSN_SCHEDULING
2182
  if (level > 1)
2183
    flag_schedule_insns = 0;
2184
#endif
2185
 
2186
  if (TARGET_MACHO)
2187
    /* The Darwin libraries never set errno, so we might as well
2188
       avoid calling them when that's the only reason we would.  */
2189
    flag_errno_math = 0;
2190
 
2191
  /* The default values of these switches depend on the TARGET_64BIT
2192
     that is not known at this moment.  Mark these values with 2 and
2193
     let user the to override these.  In case there is no command line option
2194
     specifying them, we will set the defaults in override_options.  */
2195
  if (optimize >= 1)
2196
    flag_omit_frame_pointer = 2;
2197
  flag_pcc_struct_return = 2;
2198
  flag_asynchronous_unwind_tables = 2;
2199
#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2200
  SUBTARGET_OPTIMIZATION_OPTIONS;
2201
#endif
2202
}
2203
 
2204
/* Table of valid machine attributes.  */
2205
const struct attribute_spec ix86_attribute_table[] =
2206
{
2207
  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2208
  /* Stdcall attribute says callee is responsible for popping arguments
2209
     if they are not variable.  */
2210
  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2211
  /* Fastcall attribute says callee is responsible for popping arguments
2212
     if they are not variable.  */
2213
  { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2214
  /* Cdecl attribute says the callee is a normal C declaration */
2215
  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2216
  /* Regparm attribute specifies how many integer arguments are to be
2217
     passed in registers.  */
2218
  { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
2219
  /* Sseregparm attribute says we are using x86_64 calling conventions
2220
     for FP arguments.  */
2221
  { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2222
  /* force_align_arg_pointer says this function realigns the stack at entry.  */
2223
  { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2224
    false, true,  true, ix86_handle_cconv_attribute },
2225
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2226
  { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2227
  { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2228
  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
2229
#endif
2230
  { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
2231
  { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
2232
#ifdef SUBTARGET_ATTRIBUTE_TABLE
2233
  SUBTARGET_ATTRIBUTE_TABLE,
2234
#endif
2235
  { NULL,        0, 0, false, false, false, NULL }
2236
};
2237
 
2238
/* Decide whether we can make a sibling call to a function.  DECL is the
2239
   declaration of the function being targeted by the call and EXP is the
2240
   CALL_EXPR representing the call.  */
2241
 
2242
static bool
2243
ix86_function_ok_for_sibcall (tree decl, tree exp)
2244
{
2245
  tree func;
2246
  rtx a, b;
2247
 
2248
  /* If we are generating position-independent code, we cannot sibcall
2249
     optimize any indirect call, or a direct call to a global function,
2250
     as the PLT requires %ebx be live.  */
2251
  if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2252
    return false;
2253
 
2254
  if (decl)
2255
    func = decl;
2256
  else
2257
    {
2258
      func = TREE_TYPE (TREE_OPERAND (exp, 0));
2259
      if (POINTER_TYPE_P (func))
2260
        func = TREE_TYPE (func);
2261
    }
2262
 
2263
  /* Check that the return value locations are the same.  Like
2264
     if we are returning floats on the 80387 register stack, we cannot
2265
     make a sibcall from a function that doesn't return a float to a
2266
     function that does or, conversely, from a function that does return
2267
     a float to a function that doesn't; the necessary stack adjustment
2268
     would not be executed.  This is also the place we notice
2269
     differences in the return value ABI.  Note that it is ok for one
2270
     of the functions to have void return type as long as the return
2271
     value of the other is passed in a register.  */
2272
  a = ix86_function_value (TREE_TYPE (exp), func, false);
2273
  b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2274
                           cfun->decl, false);
2275
  if (STACK_REG_P (a) || STACK_REG_P (b))
2276
    {
2277
      if (!rtx_equal_p (a, b))
2278
        return false;
2279
    }
2280
  else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2281
    ;
2282
  else if (!rtx_equal_p (a, b))
2283
    return false;
2284
 
2285
  /* If this call is indirect, we'll need to be able to use a call-clobbered
2286
     register for the address of the target function.  Make sure that all
2287
     such registers are not used for passing parameters.  */
2288
  if (!decl && !TARGET_64BIT)
2289
    {
2290
      tree type;
2291
 
2292
      /* We're looking at the CALL_EXPR, we need the type of the function.  */
2293
      type = TREE_OPERAND (exp, 0);              /* pointer expression */
2294
      type = TREE_TYPE (type);                  /* pointer type */
2295
      type = TREE_TYPE (type);                  /* function type */
2296
 
2297
      if (ix86_function_regparm (type, NULL) >= 3)
2298
        {
2299
          /* ??? Need to count the actual number of registers to be used,
2300
             not the possible number of registers.  Fix later.  */
2301
          return false;
2302
        }
2303
    }
2304
 
2305
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2306
  /* Dllimport'd functions are also called indirectly.  */
2307
  if (decl && DECL_DLLIMPORT_P (decl)
2308
      && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2309
    return false;
2310
#endif
2311
 
2312
  /* If we forced aligned the stack, then sibcalling would unalign the
2313
     stack, which may break the called function.  */
2314
  if (cfun->machine->force_align_arg_pointer)
2315
    return false;
2316
 
2317
  /* Otherwise okay.  That also includes certain types of indirect calls.  */
2318
  return true;
2319
}
2320
 
2321
/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2322
   calling convention attributes;
2323
   arguments as in struct attribute_spec.handler.  */
2324
 
2325
static tree
2326
ix86_handle_cconv_attribute (tree *node, tree name,
2327
                                   tree args,
2328
                                   int flags ATTRIBUTE_UNUSED,
2329
                                   bool *no_add_attrs)
2330
{
2331
  if (TREE_CODE (*node) != FUNCTION_TYPE
2332
      && TREE_CODE (*node) != METHOD_TYPE
2333
      && TREE_CODE (*node) != FIELD_DECL
2334
      && TREE_CODE (*node) != TYPE_DECL)
2335
    {
2336
      warning (OPT_Wattributes, "%qs attribute only applies to functions",
2337
               IDENTIFIER_POINTER (name));
2338
      *no_add_attrs = true;
2339
      return NULL_TREE;
2340
    }
2341
 
2342
  /* Can combine regparm with all attributes but fastcall.  */
2343
  if (is_attribute_p ("regparm", name))
2344
    {
2345
      tree cst;
2346
 
2347
      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2348
        {
2349
          error ("fastcall and regparm attributes are not compatible");
2350
        }
2351
 
2352
      cst = TREE_VALUE (args);
2353
      if (TREE_CODE (cst) != INTEGER_CST)
2354
        {
2355
          warning (OPT_Wattributes,
2356
                   "%qs attribute requires an integer constant argument",
2357
                   IDENTIFIER_POINTER (name));
2358
          *no_add_attrs = true;
2359
        }
2360
      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2361
        {
2362
          warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2363
                   IDENTIFIER_POINTER (name), REGPARM_MAX);
2364
          *no_add_attrs = true;
2365
        }
2366
 
2367
      if (!TARGET_64BIT
2368
          && lookup_attribute (ix86_force_align_arg_pointer_string,
2369
                               TYPE_ATTRIBUTES (*node))
2370
          && compare_tree_int (cst, REGPARM_MAX-1))
2371
        {
2372
          error ("%s functions limited to %d register parameters",
2373
                 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2374
        }
2375
 
2376
      return NULL_TREE;
2377
    }
2378
 
2379
  if (TARGET_64BIT)
2380
    {
2381
      warning (OPT_Wattributes, "%qs attribute ignored",
2382
               IDENTIFIER_POINTER (name));
2383
      *no_add_attrs = true;
2384
      return NULL_TREE;
2385
    }
2386
 
2387
  /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
2388
  if (is_attribute_p ("fastcall", name))
2389
    {
2390
      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2391
        {
2392
          error ("fastcall and cdecl attributes are not compatible");
2393
        }
2394
      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2395
        {
2396
          error ("fastcall and stdcall attributes are not compatible");
2397
        }
2398
      if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2399
        {
2400
          error ("fastcall and regparm attributes are not compatible");
2401
        }
2402
    }
2403
 
2404
  /* Can combine stdcall with fastcall (redundant), regparm and
2405
     sseregparm.  */
2406
  else if (is_attribute_p ("stdcall", name))
2407
    {
2408
      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2409
        {
2410
          error ("stdcall and cdecl attributes are not compatible");
2411
        }
2412
      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2413
        {
2414
          error ("stdcall and fastcall attributes are not compatible");
2415
        }
2416
    }
2417
 
2418
  /* Can combine cdecl with regparm and sseregparm.  */
2419
  else if (is_attribute_p ("cdecl", name))
2420
    {
2421
      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2422
        {
2423
          error ("stdcall and cdecl attributes are not compatible");
2424
        }
2425
      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2426
        {
2427
          error ("fastcall and cdecl attributes are not compatible");
2428
        }
2429
    }
2430
 
2431
  /* Can combine sseregparm with all attributes.  */
2432
 
2433
  return NULL_TREE;
2434
}
2435
 
2436
/* Return 0 if the attributes for two types are incompatible, 1 if they
2437
   are compatible, and 2 if they are nearly compatible (which causes a
2438
   warning to be generated).  */
2439
 
2440
static int
2441
ix86_comp_type_attributes (tree type1, tree type2)
2442
{
2443
  /* Check for mismatch of non-default calling convention.  */
2444
  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2445
 
2446
  if (TREE_CODE (type1) != FUNCTION_TYPE)
2447
    return 1;
2448
 
2449
  /* Check for mismatched fastcall/regparm types.  */
2450
  if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2451
       != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2452
      || (ix86_function_regparm (type1, NULL)
2453
          != ix86_function_regparm (type2, NULL)))
2454
    return 0;
2455
 
2456
  /* Check for mismatched sseregparm types.  */
2457
  if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2458
      != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2459
    return 0;
2460
 
2461
  /* Check for mismatched return types (cdecl vs stdcall).  */
2462
  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2463
      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2464
    return 0;
2465
 
2466
  return 1;
2467
}
2468
 
2469
/* Return the regparm value for a function with the indicated TYPE and DECL.
2470
   DECL may be NULL when calling function indirectly
2471
   or considering a libcall.  */
2472
 
2473
static int
2474
ix86_function_regparm (tree type, tree decl)
2475
{
2476
  tree attr;
2477
  int regparm = ix86_regparm;
2478
  bool user_convention = false;
2479
 
2480
  if (!TARGET_64BIT)
2481
    {
2482
      attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2483
      if (attr)
2484
        {
2485
          regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2486
          user_convention = true;
2487
        }
2488
 
2489
      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2490
        {
2491
          regparm = 2;
2492
          user_convention = true;
2493
        }
2494
 
2495
      /* Use register calling convention for local functions when possible.  */
2496
      if (!TARGET_64BIT && !user_convention && decl
2497
          && flag_unit_at_a_time && !profile_flag)
2498
        {
2499
          struct cgraph_local_info *i = cgraph_local_info (decl);
2500
          if (i && i->local)
2501
            {
2502
              int local_regparm, globals = 0, regno;
2503
 
2504
              /* Make sure no regparm register is taken by a global register
2505
                 variable.  */
2506
              for (local_regparm = 0; local_regparm < 3; local_regparm++)
2507
                if (global_regs[local_regparm])
2508
                  break;
2509
              /* We can't use regparm(3) for nested functions as these use
2510
                 static chain pointer in third argument.  */
2511
              if (local_regparm == 3
2512
                  && decl_function_context (decl)
2513
                  && !DECL_NO_STATIC_CHAIN (decl))
2514
                local_regparm = 2;
2515
              /* If the function realigns its stackpointer, the
2516
                 prologue will clobber %ecx.  If we've already
2517
                 generated code for the callee, the callee
2518
                 DECL_STRUCT_FUNCTION is gone, so we fall back to
2519
                 scanning the attributes for the self-realigning
2520
                 property.  */
2521
              if ((DECL_STRUCT_FUNCTION (decl)
2522
                   && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2523
                  || (!DECL_STRUCT_FUNCTION (decl)
2524
                      && lookup_attribute (ix86_force_align_arg_pointer_string,
2525
                                           TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2526
                local_regparm = 2;
2527
              /* Each global register variable increases register preassure,
2528
                 so the more global reg vars there are, the smaller regparm
2529
                 optimization use, unless requested by the user explicitly.  */
2530
              for (regno = 0; regno < 6; regno++)
2531
                if (global_regs[regno])
2532
                  globals++;
2533
              local_regparm
2534
                = globals < local_regparm ? local_regparm - globals : 0;
2535
 
2536
              if (local_regparm > regparm)
2537
                regparm = local_regparm;
2538
            }
2539
        }
2540
    }
2541
  return regparm;
2542
}
2543
 
2544
/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2545
   DFmode (2) arguments in SSE registers for a function with the
2546
   indicated TYPE and DECL.  DECL may be NULL when calling function
2547
   indirectly or considering a libcall.  Otherwise return 0.  */
2548
 
2549
static int
2550
ix86_function_sseregparm (tree type, tree decl)
2551
{
2552
  /* Use SSE registers to pass SFmode and DFmode arguments if requested
2553
     by the sseregparm attribute.  */
2554
  if (TARGET_SSEREGPARM
2555
      || (type
2556
          && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2557
    {
2558
      if (!TARGET_SSE)
2559
        {
2560
          if (decl)
2561
            error ("Calling %qD with attribute sseregparm without "
2562
                   "SSE/SSE2 enabled", decl);
2563
          else
2564
            error ("Calling %qT with attribute sseregparm without "
2565
                   "SSE/SSE2 enabled", type);
2566
          return 0;
2567
        }
2568
 
2569
      return 2;
2570
    }
2571
 
2572
  /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2573
     (and DFmode for SSE2) arguments in SSE registers,
2574
     even for 32-bit targets.  */
2575
  if (!TARGET_64BIT && decl
2576
      && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2577
    {
2578
      struct cgraph_local_info *i = cgraph_local_info (decl);
2579
      if (i && i->local)
2580
        return TARGET_SSE2 ? 2 : 1;
2581
    }
2582
 
2583
  return 0;
2584
}
2585
 
2586
/* Return true if EAX is live at the start of the function.  Used by
2587
   ix86_expand_prologue to determine if we need special help before
2588
   calling allocate_stack_worker.  */
2589
 
2590
static bool
2591
ix86_eax_live_at_start_p (void)
2592
{
2593
  /* Cheat.  Don't bother working forward from ix86_function_regparm
2594
     to the function type to whether an actual argument is located in
2595
     eax.  Instead just look at cfg info, which is still close enough
2596
     to correct at this point.  This gives false positives for broken
2597
     functions that might use uninitialized data that happens to be
2598
     allocated in eax, but who cares?  */
2599
  return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2600
}
2601
 
2602
/* Value is the number of bytes of arguments automatically
2603
   popped when returning from a subroutine call.
2604
   FUNDECL is the declaration node of the function (as a tree),
2605
   FUNTYPE is the data type of the function (as a tree),
2606
   or for a library call it is an identifier node for the subroutine name.
2607
   SIZE is the number of bytes of arguments passed on the stack.
2608
 
2609
   On the 80386, the RTD insn may be used to pop them if the number
2610
     of args is fixed, but if the number is variable then the caller
2611
     must pop them all.  RTD can't be used for library calls now
2612
     because the library is compiled with the Unix compiler.
2613
   Use of RTD is a selectable option, since it is incompatible with
2614
   standard Unix calling sequences.  If the option is not selected,
2615
   the caller must always pop the args.
2616
 
2617
   The attribute stdcall is equivalent to RTD on a per module basis.  */
2618
 
2619
int
2620
ix86_return_pops_args (tree fundecl, tree funtype, int size)
2621
{
2622
  int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2623
 
2624
  /* Cdecl functions override -mrtd, and never pop the stack.  */
2625
  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2626
 
2627
    /* Stdcall and fastcall functions will pop the stack if not
2628
       variable args.  */
2629
    if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2630
        || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2631
      rtd = 1;
2632
 
2633
    if (rtd
2634
        && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2635
            || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2636
                == void_type_node)))
2637
      return size;
2638
  }
2639
 
2640
  /* Lose any fake structure return argument if it is passed on the stack.  */
2641
  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2642
      && !TARGET_64BIT
2643
      && !KEEP_AGGREGATE_RETURN_POINTER)
2644
    {
2645
      int nregs = ix86_function_regparm (funtype, fundecl);
2646
 
2647
      if (!nregs)
2648
        return GET_MODE_SIZE (Pmode);
2649
    }
2650
 
2651
  return 0;
2652
}
2653
 
2654
/* Argument support functions.  */
2655
 
2656
/* Return true when register may be used to pass function parameters.  */
2657
bool
2658
ix86_function_arg_regno_p (int regno)
2659
{
2660
  int i;
2661
  if (!TARGET_64BIT)
2662
    {
2663
      if (TARGET_MACHO)
2664
        return (regno < REGPARM_MAX
2665
                || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2666
      else
2667
        return (regno < REGPARM_MAX
2668
                || (TARGET_MMX && MMX_REGNO_P (regno)
2669
                    && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2670
                || (TARGET_SSE && SSE_REGNO_P (regno)
2671
                    && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2672
    }
2673
 
2674
  if (TARGET_MACHO)
2675
    {
2676
      if (SSE_REGNO_P (regno) && TARGET_SSE)
2677
        return true;
2678
    }
2679
  else
2680
    {
2681
      if (TARGET_SSE && SSE_REGNO_P (regno)
2682
          && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2683
        return true;
2684
    }
2685
  /* RAX is used as hidden argument to va_arg functions.  */
2686
  if (!regno)
2687
    return true;
2688
  for (i = 0; i < REGPARM_MAX; i++)
2689
    if (regno == x86_64_int_parameter_registers[i])
2690
      return true;
2691
  return false;
2692
}
2693
 
2694
/* Return if we do not know how to pass TYPE solely in registers.  */
2695
 
2696
static bool
2697
ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2698
{
2699
  if (must_pass_in_stack_var_size_or_pad (mode, type))
2700
    return true;
2701
 
2702
  /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
2703
     The layout_type routine is crafty and tries to trick us into passing
2704
     currently unsupported vector types on the stack by using TImode.  */
2705
  return (!TARGET_64BIT && mode == TImode
2706
          && type && TREE_CODE (type) != VECTOR_TYPE);
2707
}
2708
 
2709
/* Initialize a variable CUM of type CUMULATIVE_ARGS
2710
   for a call to a function whose data type is FNTYPE.
2711
   For a library call, FNTYPE is 0.  */
2712
 
2713
void
2714
init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
2715
                      tree fntype,      /* tree ptr for function decl */
2716
                      rtx libname,      /* SYMBOL_REF of library name or 0 */
2717
                      tree fndecl)
2718
{
2719
  static CUMULATIVE_ARGS zero_cum;
2720
  tree param, next_param;
2721
 
2722
  if (TARGET_DEBUG_ARG)
2723
    {
2724
      fprintf (stderr, "\ninit_cumulative_args (");
2725
      if (fntype)
2726
        fprintf (stderr, "fntype code = %s, ret code = %s",
2727
                 tree_code_name[(int) TREE_CODE (fntype)],
2728
                 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2729
      else
2730
        fprintf (stderr, "no fntype");
2731
 
2732
      if (libname)
2733
        fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2734
    }
2735
 
2736
  *cum = zero_cum;
2737
 
2738
  /* Set up the number of registers to use for passing arguments.  */
2739
  cum->nregs = ix86_regparm;
2740
  if (TARGET_SSE)
2741
    cum->sse_nregs = SSE_REGPARM_MAX;
2742
  if (TARGET_MMX)
2743
    cum->mmx_nregs = MMX_REGPARM_MAX;
2744
  cum->warn_sse = true;
2745
  cum->warn_mmx = true;
2746
  cum->maybe_vaarg = false;
2747
 
2748
  /* Use ecx and edx registers if function has fastcall attribute,
2749
     else look for regparm information.  */
2750
  if (fntype && !TARGET_64BIT)
2751
    {
2752
      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2753
        {
2754
          cum->nregs = 2;
2755
          cum->fastcall = 1;
2756
        }
2757
      else
2758
        cum->nregs = ix86_function_regparm (fntype, fndecl);
2759
    }
2760
 
2761
  /* Set up the number of SSE registers used for passing SFmode
2762
     and DFmode arguments.  Warn for mismatching ABI.  */
2763
  cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2764
 
2765
  /* Determine if this function has variable arguments.  This is
2766
     indicated by the last argument being 'void_type_mode' if there
2767
     are no variable arguments.  If there are variable arguments, then
2768
     we won't pass anything in registers in 32-bit mode. */
2769
 
2770
  if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2771
    {
2772
      for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2773
           param != 0; param = next_param)
2774
        {
2775
          next_param = TREE_CHAIN (param);
2776
          if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2777
            {
2778
              if (!TARGET_64BIT)
2779
                {
2780
                  cum->nregs = 0;
2781
                  cum->sse_nregs = 0;
2782
                  cum->mmx_nregs = 0;
2783
                  cum->warn_sse = 0;
2784
                  cum->warn_mmx = 0;
2785
                  cum->fastcall = 0;
2786
                  cum->float_in_sse = 0;
2787
                }
2788
              cum->maybe_vaarg = true;
2789
            }
2790
        }
2791
    }
2792
  if ((!fntype && !libname)
2793
      || (fntype && !TYPE_ARG_TYPES (fntype)))
2794
    cum->maybe_vaarg = true;
2795
 
2796
  if (TARGET_DEBUG_ARG)
2797
    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2798
 
2799
  return;
2800
}
2801
 
2802
/* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
2803
   But in the case of vector types, it is some vector mode.
2804
 
2805
   When we have only some of our vector isa extensions enabled, then there
2806
   are some modes for which vector_mode_supported_p is false.  For these
2807
   modes, the generic vector support in gcc will choose some non-vector mode
2808
   in order to implement the type.  By computing the natural mode, we'll
2809
   select the proper ABI location for the operand and not depend on whatever
2810
   the middle-end decides to do with these vector types.  */
2811
 
2812
static enum machine_mode
2813
type_natural_mode (tree type)
2814
{
2815
  enum machine_mode mode = TYPE_MODE (type);
2816
 
2817
  if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2818
    {
2819
      HOST_WIDE_INT size = int_size_in_bytes (type);
2820
      if ((size == 8 || size == 16)
2821
          /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
2822
          && TYPE_VECTOR_SUBPARTS (type) > 1)
2823
        {
2824
          enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2825
 
2826
          if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2827
            mode = MIN_MODE_VECTOR_FLOAT;
2828
          else
2829
            mode = MIN_MODE_VECTOR_INT;
2830
 
2831
          /* Get the mode which has this inner mode and number of units.  */
2832
          for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2833
            if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2834
                && GET_MODE_INNER (mode) == innermode)
2835
              return mode;
2836
 
2837
          gcc_unreachable ();
2838
        }
2839
    }
2840
 
2841
  return mode;
2842
}
2843
 
2844
/* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
2845
   this may not agree with the mode that the type system has chosen for the
2846
   register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
2847
   go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
2848
 
2849
static rtx
2850
gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2851
                     unsigned int regno)
2852
{
2853
  rtx tmp;
2854
 
2855
  if (orig_mode != BLKmode)
2856
    tmp = gen_rtx_REG (orig_mode, regno);
2857
  else
2858
    {
2859
      tmp = gen_rtx_REG (mode, regno);
2860
      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2861
      tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2862
    }
2863
 
2864
  return tmp;
2865
}
2866
 
2867
/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
2868
   of this code is to classify each 8bytes of incoming argument by the register
2869
   class and assign registers accordingly.  */
2870
 
2871
/* Return the union class of CLASS1 and CLASS2.
2872
   See the x86-64 PS ABI for details.  */
2873
 
2874
static enum x86_64_reg_class
2875
merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2876
{
2877
  /* Rule #1: If both classes are equal, this is the resulting class.  */
2878
  if (class1 == class2)
2879
    return class1;
2880
 
2881
  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2882
     the other class.  */
2883
  if (class1 == X86_64_NO_CLASS)
2884
    return class2;
2885
  if (class2 == X86_64_NO_CLASS)
2886
    return class1;
2887
 
2888
  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
2889
  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2890
    return X86_64_MEMORY_CLASS;
2891
 
2892
  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
2893
  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2894
      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2895
    return X86_64_INTEGERSI_CLASS;
2896
  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2897
      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2898
    return X86_64_INTEGER_CLASS;
2899
 
2900
  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2901
     MEMORY is used.  */
2902
  if (class1 == X86_64_X87_CLASS
2903
      || class1 == X86_64_X87UP_CLASS
2904
      || class1 == X86_64_COMPLEX_X87_CLASS
2905
      || class2 == X86_64_X87_CLASS
2906
      || class2 == X86_64_X87UP_CLASS
2907
      || class2 == X86_64_COMPLEX_X87_CLASS)
2908
    return X86_64_MEMORY_CLASS;
2909
 
2910
  /* Rule #6: Otherwise class SSE is used.  */
2911
  return X86_64_SSE_CLASS;
2912
}
2913
 
2914
/* Classify the argument of type TYPE and mode MODE.
2915
   CLASSES will be filled by the register class used to pass each word
2916
   of the operand.  The number of words is returned.  In case the parameter
2917
   should be passed in memory, 0 is returned. As a special case for zero
2918
   sized containers, classes[0] will be NO_CLASS and 1 is returned.
2919
 
2920
   BIT_OFFSET is used internally for handling records and specifies offset
2921
   of the offset in bits modulo 256 to avoid overflow cases.
2922
 
2923
   See the x86-64 PS ABI for details.
2924
*/
2925
 
2926
static int
2927
classify_argument (enum machine_mode mode, tree type,
2928
                   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2929
{
2930
  HOST_WIDE_INT bytes =
2931
    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2932
  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2933
 
2934
  /* Variable sized entities are always passed/returned in memory.  */
2935
  if (bytes < 0)
2936
    return 0;
2937
 
2938
  if (mode != VOIDmode
2939
      && targetm.calls.must_pass_in_stack (mode, type))
2940
    return 0;
2941
 
2942
  if (type && AGGREGATE_TYPE_P (type))
2943
    {
2944
      int i;
2945
      tree field;
2946
      enum x86_64_reg_class subclasses[MAX_CLASSES];
2947
 
2948
      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
2949
      if (bytes > 16)
2950
        return 0;
2951
 
2952
      for (i = 0; i < words; i++)
2953
        classes[i] = X86_64_NO_CLASS;
2954
 
2955
      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
2956
         signalize memory class, so handle it as special case.  */
2957
      if (!words)
2958
        {
2959
          classes[0] = X86_64_NO_CLASS;
2960
          return 1;
2961
        }
2962
 
2963
      /* Classify each field of record and merge classes.  */
2964
      switch (TREE_CODE (type))
2965
        {
2966
        case RECORD_TYPE:
2967
          /* For classes first merge in the field of the subclasses.  */
2968
          if (TYPE_BINFO (type))
2969
            {
2970
              tree binfo, base_binfo;
2971
              int basenum;
2972
 
2973
              for (binfo = TYPE_BINFO (type), basenum = 0;
2974
                   BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2975
                {
2976
                   int num;
2977
                   int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2978
                   tree type = BINFO_TYPE (base_binfo);
2979
 
2980
                   num = classify_argument (TYPE_MODE (type),
2981
                                            type, subclasses,
2982
                                            (offset + bit_offset) % 256);
2983
                   if (!num)
2984
                     return 0;
2985
                   for (i = 0; i < num; i++)
2986
                     {
2987
                       int pos = (offset + (bit_offset % 64)) / 8 / 8;
2988
                       classes[i + pos] =
2989
                         merge_classes (subclasses[i], classes[i + pos]);
2990
                     }
2991
                }
2992
            }
2993
          /* And now merge the fields of structure.  */
2994
          for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2995
            {
2996
              if (TREE_CODE (field) == FIELD_DECL)
2997
                {
2998
                  int num;
2999
 
3000
                  if (TREE_TYPE (field) == error_mark_node)
3001
                    continue;
3002
 
3003
                  /* Bitfields are always classified as integer.  Handle them
3004
                     early, since later code would consider them to be
3005
                     misaligned integers.  */
3006
                  if (DECL_BIT_FIELD (field))
3007
                    {
3008
                      for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3009
                           i < ((int_bit_position (field) + (bit_offset % 64))
3010
                                + tree_low_cst (DECL_SIZE (field), 0)
3011
                                + 63) / 8 / 8; i++)
3012
                        classes[i] =
3013
                          merge_classes (X86_64_INTEGER_CLASS,
3014
                                         classes[i]);
3015
                    }
3016
                  else
3017
                    {
3018
                      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3019
                                               TREE_TYPE (field), subclasses,
3020
                                               (int_bit_position (field)
3021
                                                + bit_offset) % 256);
3022
                      if (!num)
3023
                        return 0;
3024
                      for (i = 0; i < num; i++)
3025
                        {
3026
                          int pos =
3027
                            (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3028
                          classes[i + pos] =
3029
                            merge_classes (subclasses[i], classes[i + pos]);
3030
                        }
3031
                    }
3032
                }
3033
            }
3034
          break;
3035
 
3036
        case ARRAY_TYPE:
3037
          /* Arrays are handled as small records.  */
3038
          {
3039
            int num;
3040
            num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3041
                                     TREE_TYPE (type), subclasses, bit_offset);
3042
            if (!num)
3043
              return 0;
3044
 
3045
            /* The partial classes are now full classes.  */
3046
            if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3047
              subclasses[0] = X86_64_SSE_CLASS;
3048
            if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3049
              subclasses[0] = X86_64_INTEGER_CLASS;
3050
 
3051
            for (i = 0; i < words; i++)
3052
              classes[i] = subclasses[i % num];
3053
 
3054
            break;
3055
          }
3056
        case UNION_TYPE:
3057
        case QUAL_UNION_TYPE:
3058
          /* Unions are similar to RECORD_TYPE but offset is always 0.
3059
             */
3060
 
3061
          /* Unions are not derived.  */
3062
          gcc_assert (!TYPE_BINFO (type)
3063
                      || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3064
          for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3065
            {
3066
              if (TREE_CODE (field) == FIELD_DECL)
3067
                {
3068
                  int num;
3069
 
3070
                  if (TREE_TYPE (field) == error_mark_node)
3071
                    continue;
3072
 
3073
                  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3074
                                           TREE_TYPE (field), subclasses,
3075
                                           bit_offset);
3076
                  if (!num)
3077
                    return 0;
3078
                  for (i = 0; i < num; i++)
3079
                    classes[i] = merge_classes (subclasses[i], classes[i]);
3080
                }
3081
            }
3082
          break;
3083
 
3084
        default:
3085
          gcc_unreachable ();
3086
        }
3087
 
3088
      /* Final merger cleanup.  */
3089
      for (i = 0; i < words; i++)
3090
        {
3091
          /* If one class is MEMORY, everything should be passed in
3092
             memory.  */
3093
          if (classes[i] == X86_64_MEMORY_CLASS)
3094
            return 0;
3095
 
3096
          /* The X86_64_SSEUP_CLASS should be always preceded by
3097
             X86_64_SSE_CLASS.  */
3098
          if (classes[i] == X86_64_SSEUP_CLASS
3099
              && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3100
            classes[i] = X86_64_SSE_CLASS;
3101
 
3102
          /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
3103
          if (classes[i] == X86_64_X87UP_CLASS
3104
              && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3105
            classes[i] = X86_64_SSE_CLASS;
3106
        }
3107
      return words;
3108
    }
3109
 
3110
  /* Compute alignment needed.  We align all types to natural boundaries with
3111
     exception of XFmode that is aligned to 64bits.  */
3112
  if (mode != VOIDmode && mode != BLKmode)
3113
    {
3114
      int mode_alignment = GET_MODE_BITSIZE (mode);
3115
 
3116
      if (mode == XFmode)
3117
        mode_alignment = 128;
3118
      else if (mode == XCmode)
3119
        mode_alignment = 256;
3120
      if (COMPLEX_MODE_P (mode))
3121
        mode_alignment /= 2;
3122
      /* Misaligned fields are always returned in memory.  */
3123
      if (bit_offset % mode_alignment)
3124
        return 0;
3125
    }
3126
 
3127
  /* for V1xx modes, just use the base mode */
3128
  if (VECTOR_MODE_P (mode)
3129
      && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3130
    mode = GET_MODE_INNER (mode);
3131
 
3132
  /* Classification of atomic types.  */
3133
  switch (mode)
3134
    {
3135
    case SDmode:
3136
    case DDmode:
3137
      classes[0] = X86_64_SSE_CLASS;
3138
      return 1;
3139
    case TDmode:
3140
      classes[0] = X86_64_SSE_CLASS;
3141
      classes[1] = X86_64_SSEUP_CLASS;
3142
      return 2;
3143
    case DImode:
3144
    case SImode:
3145
    case HImode:
3146
    case QImode:
3147
    case CSImode:
3148
    case CHImode:
3149
    case CQImode:
3150
      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3151
        classes[0] = X86_64_INTEGERSI_CLASS;
3152
      else
3153
        classes[0] = X86_64_INTEGER_CLASS;
3154
      return 1;
3155
    case CDImode:
3156
    case TImode:
3157
      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3158
      return 2;
3159
    case CTImode:
3160
      return 0;
3161
    case SFmode:
3162
      if (!(bit_offset % 64))
3163
        classes[0] = X86_64_SSESF_CLASS;
3164
      else
3165
        classes[0] = X86_64_SSE_CLASS;
3166
      return 1;
3167
    case DFmode:
3168
      classes[0] = X86_64_SSEDF_CLASS;
3169
      return 1;
3170
    case XFmode:
3171
      classes[0] = X86_64_X87_CLASS;
3172
      classes[1] = X86_64_X87UP_CLASS;
3173
      return 2;
3174
    case TFmode:
3175
      classes[0] = X86_64_SSE_CLASS;
3176
      classes[1] = X86_64_SSEUP_CLASS;
3177
      return 2;
3178
    case SCmode:
3179
      classes[0] = X86_64_SSE_CLASS;
3180
      return 1;
3181
    case DCmode:
3182
      classes[0] = X86_64_SSEDF_CLASS;
3183
      classes[1] = X86_64_SSEDF_CLASS;
3184
      return 2;
3185
    case XCmode:
3186
      classes[0] = X86_64_COMPLEX_X87_CLASS;
3187
      return 1;
3188
    case TCmode:
3189
      /* This modes is larger than 16 bytes.  */
3190
      return 0;
3191
    case V4SFmode:
3192
    case V4SImode:
3193
    case V16QImode:
3194
    case V8HImode:
3195
    case V2DFmode:
3196
    case V2DImode:
3197
      classes[0] = X86_64_SSE_CLASS;
3198
      classes[1] = X86_64_SSEUP_CLASS;
3199
      return 2;
3200
    case V2SFmode:
3201
    case V2SImode:
3202
    case V4HImode:
3203
    case V8QImode:
3204
      classes[0] = X86_64_SSE_CLASS;
3205
      return 1;
3206
    case BLKmode:
3207
    case VOIDmode:
3208
      return 0;
3209
    default:
3210
      gcc_assert (VECTOR_MODE_P (mode));
3211
 
3212
      if (bytes > 16)
3213
        return 0;
3214
 
3215
      gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3216
 
3217
      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3218
        classes[0] = X86_64_INTEGERSI_CLASS;
3219
      else
3220
        classes[0] = X86_64_INTEGER_CLASS;
3221
      classes[1] = X86_64_INTEGER_CLASS;
3222
      return 1 + (bytes > 8);
3223
    }
3224
}
3225
 
3226
/* Examine the argument and return set number of register required in each
3227
   class.  Return 0 iff parameter should be passed in memory.  */
3228
static int
3229
examine_argument (enum machine_mode mode, tree type, int in_return,
3230
                  int *int_nregs, int *sse_nregs)
3231
{
3232
  enum x86_64_reg_class class[MAX_CLASSES];
3233
  int n = classify_argument (mode, type, class, 0);
3234
 
3235
  *int_nregs = 0;
3236
  *sse_nregs = 0;
3237
  if (!n)
3238
    return 0;
3239
  for (n--; n >= 0; n--)
3240
    switch (class[n])
3241
      {
3242
      case X86_64_INTEGER_CLASS:
3243
      case X86_64_INTEGERSI_CLASS:
3244
        (*int_nregs)++;
3245
        break;
3246
      case X86_64_SSE_CLASS:
3247
      case X86_64_SSESF_CLASS:
3248
      case X86_64_SSEDF_CLASS:
3249
        (*sse_nregs)++;
3250
        break;
3251
      case X86_64_NO_CLASS:
3252
      case X86_64_SSEUP_CLASS:
3253
        break;
3254
      case X86_64_X87_CLASS:
3255
      case X86_64_X87UP_CLASS:
3256
        if (!in_return)
3257
          return 0;
3258
        break;
3259
      case X86_64_COMPLEX_X87_CLASS:
3260
        return in_return ? 2 : 0;
3261
      case X86_64_MEMORY_CLASS:
3262
        gcc_unreachable ();
3263
      }
3264
  return 1;
3265
}
3266
 
3267
/* Construct container for the argument used by GCC interface.  See
3268
   FUNCTION_ARG for the detailed description.  */
3269
 
3270
static rtx
3271
construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3272
                     tree type, int in_return, int nintregs, int nsseregs,
3273
                     const int *intreg, int sse_regno)
3274
{
3275
  /* The following variables hold the static issued_error state.  */
3276
  static bool issued_sse_arg_error;
3277
  static bool issued_sse_ret_error;
3278
  static bool issued_x87_ret_error;
3279
 
3280
  enum machine_mode tmpmode;
3281
  int bytes =
3282
    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3283
  enum x86_64_reg_class class[MAX_CLASSES];
3284
  int n;
3285
  int i;
3286
  int nexps = 0;
3287
  int needed_sseregs, needed_intregs;
3288
  rtx exp[MAX_CLASSES];
3289
  rtx ret;
3290
 
3291
  n = classify_argument (mode, type, class, 0);
3292
  if (TARGET_DEBUG_ARG)
3293
    {
3294
      if (!n)
3295
        fprintf (stderr, "Memory class\n");
3296
      else
3297
        {
3298
          fprintf (stderr, "Classes:");
3299
          for (i = 0; i < n; i++)
3300
            {
3301
              fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3302
            }
3303
           fprintf (stderr, "\n");
3304
        }
3305
    }
3306
  if (!n)
3307
    return NULL;
3308
  if (!examine_argument (mode, type, in_return, &needed_intregs,
3309
                         &needed_sseregs))
3310
    return NULL;
3311
  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3312
    return NULL;
3313
 
3314
  /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
3315
     some less clueful developer tries to use floating-point anyway.  */
3316
  if (needed_sseregs && !TARGET_SSE)
3317
    {
3318
      if (in_return)
3319
        {
3320
          if (!issued_sse_ret_error)
3321
            {
3322
              error ("SSE register return with SSE disabled");
3323
              issued_sse_ret_error = true;
3324
            }
3325
        }
3326
      else if (!issued_sse_arg_error)
3327
        {
3328
          error ("SSE register argument with SSE disabled");
3329
          issued_sse_arg_error = true;
3330
        }
3331
      return NULL;
3332
    }
3333
 
3334
  /* Likewise, error if the ABI requires us to return values in the
3335
     x87 registers and the user specified -mno-80387.  */
3336
  if (!TARGET_80387 && in_return)
3337
    for (i = 0; i < n; i++)
3338
      if (class[i] == X86_64_X87_CLASS
3339
          || class[i] == X86_64_X87UP_CLASS
3340
          || class[i] == X86_64_COMPLEX_X87_CLASS)
3341
        {
3342
          if (!issued_x87_ret_error)
3343
            {
3344
              error ("x87 register return with x87 disabled");
3345
              issued_x87_ret_error = true;
3346
            }
3347
          return NULL;
3348
        }
3349
 
3350
  /* First construct simple cases.  Avoid SCmode, since we want to use
3351
     single register to pass this type.  */
3352
  if (n == 1 && mode != SCmode)
3353
    switch (class[0])
3354
      {
3355
      case X86_64_INTEGER_CLASS:
3356
      case X86_64_INTEGERSI_CLASS:
3357
        return gen_rtx_REG (mode, intreg[0]);
3358
      case X86_64_SSE_CLASS:
3359
      case X86_64_SSESF_CLASS:
3360
      case X86_64_SSEDF_CLASS:
3361
        return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3362
      case X86_64_X87_CLASS:
3363
      case X86_64_COMPLEX_X87_CLASS:
3364
        return gen_rtx_REG (mode, FIRST_STACK_REG);
3365
      case X86_64_NO_CLASS:
3366
        /* Zero sized array, struct or class.  */
3367
        return NULL;
3368
      default:
3369
        gcc_unreachable ();
3370
      }
3371
  if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3372
      && mode != BLKmode)
3373
    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3374
  if (n == 2
3375
      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3376
    return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3377
  if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3378
      && class[1] == X86_64_INTEGER_CLASS
3379
      && (mode == CDImode || mode == TImode || mode == TFmode)
3380
      && intreg[0] + 1 == intreg[1])
3381
    return gen_rtx_REG (mode, intreg[0]);
3382
 
3383
  /* Otherwise figure out the entries of the PARALLEL.  */
3384
  for (i = 0; i < n; i++)
3385
    {
3386
      switch (class[i])
3387
        {
3388
          case X86_64_NO_CLASS:
3389
            break;
3390
          case X86_64_INTEGER_CLASS:
3391
          case X86_64_INTEGERSI_CLASS:
3392
            /* Merge TImodes on aligned occasions here too.  */
3393
            if (i * 8 + 8 > bytes)
3394
              tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3395
            else if (class[i] == X86_64_INTEGERSI_CLASS)
3396
              tmpmode = SImode;
3397
            else
3398
              tmpmode = DImode;
3399
            /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
3400
            if (tmpmode == BLKmode)
3401
              tmpmode = DImode;
3402
            exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3403
                                               gen_rtx_REG (tmpmode, *intreg),
3404
                                               GEN_INT (i*8));
3405
            intreg++;
3406
            break;
3407
          case X86_64_SSESF_CLASS:
3408
            exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3409
                                               gen_rtx_REG (SFmode,
3410
                                                            SSE_REGNO (sse_regno)),
3411
                                               GEN_INT (i*8));
3412
            sse_regno++;
3413
            break;
3414
          case X86_64_SSEDF_CLASS:
3415
            exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3416
                                               gen_rtx_REG (DFmode,
3417
                                                            SSE_REGNO (sse_regno)),
3418
                                               GEN_INT (i*8));
3419
            sse_regno++;
3420
            break;
3421
          case X86_64_SSE_CLASS:
3422
            if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3423
              tmpmode = TImode;
3424
            else
3425
              tmpmode = DImode;
3426
            exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3427
                                               gen_rtx_REG (tmpmode,
3428
                                                            SSE_REGNO (sse_regno)),
3429
                                               GEN_INT (i*8));
3430
            if (tmpmode == TImode)
3431
              i++;
3432
            sse_regno++;
3433
            break;
3434
          default:
3435
            gcc_unreachable ();
3436
        }
3437
    }
3438
 
3439
  /* Empty aligned struct, union or class.  */
3440
  if (nexps == 0)
3441
    return NULL;
3442
 
3443
  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3444
  for (i = 0; i < nexps; i++)
3445
    XVECEXP (ret, 0, i) = exp [i];
3446
  return ret;
3447
}
3448
 
3449
/* Update the data in CUM to advance over an argument
3450
   of mode MODE and data type TYPE.
3451
   (TYPE is null for libcalls where that information may not be available.)  */
3452
 
3453
void
3454
function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3455
                      tree type, int named)
3456
{
3457
  int bytes =
3458
    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3459
  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3460
 
3461
  if (type)
3462
    mode = type_natural_mode (type);
3463
 
3464
  if (TARGET_DEBUG_ARG)
3465
    fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3466
             "mode=%s, named=%d)\n\n",
3467
             words, cum->words, cum->nregs, cum->sse_nregs,
3468
             GET_MODE_NAME (mode), named);
3469
 
3470
  if (TARGET_64BIT)
3471
    {
3472
      int int_nregs, sse_nregs;
3473
      if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3474
        cum->words += words;
3475
      else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3476
        {
3477
          cum->nregs -= int_nregs;
3478
          cum->sse_nregs -= sse_nregs;
3479
          cum->regno += int_nregs;
3480
          cum->sse_regno += sse_nregs;
3481
        }
3482
      else
3483
        cum->words += words;
3484
    }
3485
  else
3486
    {
3487
      switch (mode)
3488
        {
3489
        default:
3490
          break;
3491
 
3492
        case BLKmode:
3493
          if (bytes < 0)
3494
            break;
3495
          /* FALLTHRU */
3496
 
3497
        case DImode:
3498
        case SImode:
3499
        case HImode:
3500
        case QImode:
3501
          cum->words += words;
3502
          cum->nregs -= words;
3503
          cum->regno += words;
3504
 
3505
          if (cum->nregs <= 0)
3506
            {
3507
              cum->nregs = 0;
3508
              cum->regno = 0;
3509
            }
3510
          break;
3511
 
3512
        case DFmode:
3513
          if (cum->float_in_sse < 2)
3514
            break;
3515
        case SFmode:
3516
          if (cum->float_in_sse < 1)
3517
            break;
3518
          /* FALLTHRU */
3519
 
3520
        case TImode:
3521
        case V16QImode:
3522
        case V8HImode:
3523
        case V4SImode:
3524
        case V2DImode:
3525
        case V4SFmode:
3526
        case V2DFmode:
3527
          if (!type || !AGGREGATE_TYPE_P (type))
3528
            {
3529
              cum->sse_words += words;
3530
              cum->sse_nregs -= 1;
3531
              cum->sse_regno += 1;
3532
              if (cum->sse_nregs <= 0)
3533
                {
3534
                  cum->sse_nregs = 0;
3535
                  cum->sse_regno = 0;
3536
                }
3537
            }
3538
          break;
3539
 
3540
        case V8QImode:
3541
        case V4HImode:
3542
        case V2SImode:
3543
        case V2SFmode:
3544
          if (!type || !AGGREGATE_TYPE_P (type))
3545
            {
3546
              cum->mmx_words += words;
3547
              cum->mmx_nregs -= 1;
3548
              cum->mmx_regno += 1;
3549
              if (cum->mmx_nregs <= 0)
3550
                {
3551
                  cum->mmx_nregs = 0;
3552
                  cum->mmx_regno = 0;
3553
                }
3554
            }
3555
          break;
3556
        }
3557
    }
3558
}
3559
 
3560
/* Define where to put the arguments to a function.
3561
   Value is zero to push the argument on the stack,
3562
   or a hard register in which to store the argument.
3563
 
3564
   MODE is the argument's machine mode.
3565
   TYPE is the data type of the argument (as a tree).
3566
    This is null for libcalls where that information may
3567
    not be available.
3568
   CUM is a variable of type CUMULATIVE_ARGS which gives info about
3569
    the preceding args and about the function being called.
3570
   NAMED is nonzero if this argument is a named parameter
3571
    (otherwise it is an extra parameter matching an ellipsis).  */
3572
 
3573
rtx
3574
function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3575
              tree type, int named)
3576
{
3577
  enum machine_mode mode = orig_mode;
3578
  rtx ret = NULL_RTX;
3579
  int bytes =
3580
    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3581
  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3582
  static bool warnedsse, warnedmmx;
3583
 
3584
  /* To simplify the code below, represent vector types with a vector mode
3585
     even if MMX/SSE are not active.  */
3586
  if (type && TREE_CODE (type) == VECTOR_TYPE)
3587
    mode = type_natural_mode (type);
3588
 
3589
  /* Handle a hidden AL argument containing number of registers for varargs
3590
     x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
3591
     any AL settings.  */
3592
  if (mode == VOIDmode)
3593
    {
3594
      if (TARGET_64BIT)
3595
        return GEN_INT (cum->maybe_vaarg
3596
                        ? (cum->sse_nregs < 0
3597
                           ? SSE_REGPARM_MAX
3598
                           : cum->sse_regno)
3599
                        : -1);
3600
      else
3601
        return constm1_rtx;
3602
    }
3603
  if (TARGET_64BIT)
3604
    ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3605
                               cum->sse_nregs,
3606
                               &x86_64_int_parameter_registers [cum->regno],
3607
                               cum->sse_regno);
3608
  else
3609
    switch (mode)
3610
      {
3611
        /* For now, pass fp/complex values on the stack.  */
3612
      default:
3613
        break;
3614
 
3615
      case BLKmode:
3616
        if (bytes < 0)
3617
          break;
3618
        /* FALLTHRU */
3619
      case DImode:
3620
      case SImode:
3621
      case HImode:
3622
      case QImode:
3623
        if (words <= cum->nregs)
3624
          {
3625
            int regno = cum->regno;
3626
 
3627
            /* Fastcall allocates the first two DWORD (SImode) or
3628
               smaller arguments to ECX and EDX.  */
3629
            if (cum->fastcall)
3630
              {
3631
                if (mode == BLKmode || mode == DImode)
3632
                  break;
3633
 
3634
                /* ECX not EAX is the first allocated register.  */
3635
                if (regno == 0)
3636
                  regno = 2;
3637
              }
3638
            ret = gen_rtx_REG (mode, regno);
3639
          }
3640
        break;
3641
      case DFmode:
3642
        if (cum->float_in_sse < 2)
3643
          break;
3644
      case SFmode:
3645
        if (cum->float_in_sse < 1)
3646
          break;
3647
        /* FALLTHRU */
3648
      case TImode:
3649
      case V16QImode:
3650
      case V8HImode:
3651
      case V4SImode:
3652
      case V2DImode:
3653
      case V4SFmode:
3654
      case V2DFmode:
3655
        if (!type || !AGGREGATE_TYPE_P (type))
3656
          {
3657
            if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3658
              {
3659
                warnedsse = true;
3660
                warning (0, "SSE vector argument without SSE enabled "
3661
                         "changes the ABI");
3662
              }
3663
            if (cum->sse_nregs)
3664
              ret = gen_reg_or_parallel (mode, orig_mode,
3665
                                         cum->sse_regno + FIRST_SSE_REG);
3666
          }
3667
        break;
3668
      case V8QImode:
3669
      case V4HImode:
3670
      case V2SImode:
3671
      case V2SFmode:
3672
        if (!type || !AGGREGATE_TYPE_P (type))
3673
          {
3674
            if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3675
              {
3676
                warnedmmx = true;
3677
                warning (0, "MMX vector argument without MMX enabled "
3678
                         "changes the ABI");
3679
              }
3680
            if (cum->mmx_nregs)
3681
              ret = gen_reg_or_parallel (mode, orig_mode,
3682
                                         cum->mmx_regno + FIRST_MMX_REG);
3683
          }
3684
        break;
3685
      }
3686
 
3687
  if (TARGET_DEBUG_ARG)
3688
    {
3689
      fprintf (stderr,
3690
               "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3691
               words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3692
 
3693
      if (ret)
3694
        print_simple_rtl (stderr, ret);
3695
      else
3696
        fprintf (stderr, ", stack");
3697
 
3698
      fprintf (stderr, " )\n");
3699
    }
3700
 
3701
  return ret;
3702
}
3703
 
3704
/* A C expression that indicates when an argument must be passed by
3705
   reference.  If nonzero for an argument, a copy of that argument is
3706
   made in memory and a pointer to the argument is passed instead of
3707
   the argument itself.  The pointer is passed in whatever way is
3708
   appropriate for passing a pointer to that type.  */
3709
 
3710
static bool
3711
ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3712
                        enum machine_mode mode ATTRIBUTE_UNUSED,
3713
                        tree type, bool named ATTRIBUTE_UNUSED)
3714
{
3715
  if (!TARGET_64BIT)
3716
    return 0;
3717
 
3718
  if (type && int_size_in_bytes (type) == -1)
3719
    {
3720
      if (TARGET_DEBUG_ARG)
3721
        fprintf (stderr, "function_arg_pass_by_reference\n");
3722
      return 1;
3723
    }
3724
 
3725
  return 0;
3726
}
3727
 
3728
/* Return true when TYPE should be 128bit aligned for 32bit argument passing
3729
   ABI.  Only called if TARGET_SSE.  */
3730
static bool
3731
contains_128bit_aligned_vector_p (tree type)
3732
{
3733
  enum machine_mode mode = TYPE_MODE (type);
3734
  if (SSE_REG_MODE_P (mode)
3735
      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3736
    return true;
3737
  if (TYPE_ALIGN (type) < 128)
3738
    return false;
3739
 
3740
  if (AGGREGATE_TYPE_P (type))
3741
    {
3742
      /* Walk the aggregates recursively.  */
3743
      switch (TREE_CODE (type))
3744
        {
3745
        case RECORD_TYPE:
3746
        case UNION_TYPE:
3747
        case QUAL_UNION_TYPE:
3748
          {
3749
            tree field;
3750
 
3751
            if (TYPE_BINFO (type))
3752
              {
3753
                tree binfo, base_binfo;
3754
                int i;
3755
 
3756
                for (binfo = TYPE_BINFO (type), i = 0;
3757
                     BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3758
                  if (contains_128bit_aligned_vector_p
3759
                      (BINFO_TYPE (base_binfo)))
3760
                    return true;
3761
              }
3762
            /* And now merge the fields of structure.  */
3763
            for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3764
              {
3765
                if (TREE_CODE (field) == FIELD_DECL
3766
                    && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3767
                  return true;
3768
              }
3769
            break;
3770
          }
3771
 
3772
        case ARRAY_TYPE:
3773
          /* Just for use if some languages passes arrays by value.  */
3774
          if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3775
            return true;
3776
          break;
3777
 
3778
        default:
3779
          gcc_unreachable ();
3780
        }
3781
    }
3782
  return false;
3783
}
3784
 
3785
/* Gives the alignment boundary, in bits, of an argument with the
3786
   specified mode and type.  */
3787
 
3788
int
3789
ix86_function_arg_boundary (enum machine_mode mode, tree type)
3790
{
3791
  int align;
3792
  if (type)
3793
    align = TYPE_ALIGN (type);
3794
  else
3795
    align = GET_MODE_ALIGNMENT (mode);
3796
  if (align < PARM_BOUNDARY)
3797
    align = PARM_BOUNDARY;
3798
  if (!TARGET_64BIT)
3799
    {
3800
      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
3801
         make an exception for SSE modes since these require 128bit
3802
         alignment.
3803
 
3804
         The handling here differs from field_alignment.  ICC aligns MMX
3805
         arguments to 4 byte boundaries, while structure fields are aligned
3806
         to 8 byte boundaries.  */
3807
      if (!TARGET_SSE)
3808
        align = PARM_BOUNDARY;
3809
      else if (!type)
3810
        {
3811
          if (!SSE_REG_MODE_P (mode))
3812
            align = PARM_BOUNDARY;
3813
        }
3814
      else
3815
        {
3816
          if (!contains_128bit_aligned_vector_p (type))
3817
            align = PARM_BOUNDARY;
3818
        }
3819
    }
3820
  if (align > 128)
3821
    align = 128;
3822
  return align;
3823
}
3824
 
3825
/* Return true if N is a possible register number of function value.  */
3826
bool
3827
ix86_function_value_regno_p (int regno)
3828
{
3829
  if (TARGET_MACHO)
3830
    {
3831
      if (!TARGET_64BIT)
3832
        {
3833
          return ((regno) == 0
3834
                  || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3835
                  || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3836
        }
3837
      return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3838
              || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3839
              || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3840
      }
3841
  else
3842
    {
3843
      if (regno == 0
3844
          || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3845
          || (regno == FIRST_SSE_REG && TARGET_SSE))
3846
        return true;
3847
 
3848
      if (!TARGET_64BIT
3849
          && (regno == FIRST_MMX_REG && TARGET_MMX))
3850
            return true;
3851
 
3852
      return false;
3853
    }
3854
}
3855
 
3856
/* Define how to find the value returned by a function.
3857
   VALTYPE is the data type of the value (as a tree).
3858
   If the precise function being called is known, FUNC is its FUNCTION_DECL;
3859
   otherwise, FUNC is 0.  */
3860
rtx
3861
ix86_function_value (tree valtype, tree fntype_or_decl,
3862
                     bool outgoing ATTRIBUTE_UNUSED)
3863
{
3864
  enum machine_mode natmode = type_natural_mode (valtype);
3865
 
3866
  if (TARGET_64BIT)
3867
    {
3868
      rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3869
                                     1, REGPARM_MAX, SSE_REGPARM_MAX,
3870
                                     x86_64_int_return_registers, 0);
3871
      /* For zero sized structures, construct_container return NULL, but we
3872
         need to keep rest of compiler happy by returning meaningful value.  */
3873
      if (!ret)
3874
        ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3875
      return ret;
3876
    }
3877
  else
3878
    {
3879
      tree fn = NULL_TREE, fntype;
3880
      if (fntype_or_decl
3881
          && DECL_P (fntype_or_decl))
3882
        fn = fntype_or_decl;
3883
      fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3884
      return gen_rtx_REG (TYPE_MODE (valtype),
3885
                          ix86_value_regno (natmode, fn, fntype));
3886
    }
3887
}
3888
 
3889
/* Return true iff type is returned in memory.  */
3890
int
3891
ix86_return_in_memory (tree type)
3892
{
3893
  int needed_intregs, needed_sseregs, size;
3894
  enum machine_mode mode = type_natural_mode (type);
3895
 
3896
  if (TARGET_64BIT)
3897
    return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3898
 
3899
  if (mode == BLKmode)
3900
    return 1;
3901
 
3902
  size = int_size_in_bytes (type);
3903
 
3904
  if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3905
    return 0;
3906
 
3907
  if (VECTOR_MODE_P (mode) || mode == TImode)
3908
    {
3909
      /* User-created vectors small enough to fit in EAX.  */
3910
      if (size < 8)
3911
        return 0;
3912
 
3913
      /* MMX/3dNow values are returned in MM0,
3914
         except when it doesn't exits.  */
3915
      if (size == 8)
3916
        return (TARGET_MMX ? 0 : 1);
3917
 
3918
      /* SSE values are returned in XMM0, except when it doesn't exist.  */
3919
      if (size == 16)
3920
        return (TARGET_SSE ? 0 : 1);
3921
    }
3922
 
3923
  if (mode == XFmode)
3924
    return 0;
3925
 
3926
  if (mode == TDmode)
3927
    return 1;
3928
 
3929
  if (size > 12)
3930
    return 1;
3931
  return 0;
3932
}
3933
 
3934
/* When returning SSE vector types, we have a choice of either
3935
     (1) being abi incompatible with a -march switch, or
3936
     (2) generating an error.
3937
   Given no good solution, I think the safest thing is one warning.
3938
   The user won't be able to use -Werror, but....
3939
 
3940
   Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3941
   called in response to actually generating a caller or callee that
3942
   uses such a type.  As opposed to RETURN_IN_MEMORY, which is called
3943
   via aggregate_value_p for general type probing from tree-ssa.  */
3944
 
3945
static rtx
3946
ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3947
{
3948
  static bool warnedsse, warnedmmx;
3949
 
3950
  if (type)
3951
    {
3952
      /* Look at the return type of the function, not the function type.  */
3953
      enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3954
 
3955
      if (!TARGET_SSE && !warnedsse)
3956
        {
3957
          if (mode == TImode
3958
              || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3959
            {
3960
              warnedsse = true;
3961
              warning (0, "SSE vector return without SSE enabled "
3962
                       "changes the ABI");
3963
            }
3964
        }
3965
 
3966
      if (!TARGET_MMX && !warnedmmx)
3967
        {
3968
          if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3969
            {
3970
              warnedmmx = true;
3971
              warning (0, "MMX vector return without MMX enabled "
3972
                       "changes the ABI");
3973
            }
3974
        }
3975
    }
3976
 
3977
  return NULL;
3978
}
3979
 
3980
/* Define how to find the value returned by a library function
3981
   assuming the value has mode MODE.  */
3982
rtx
3983
ix86_libcall_value (enum machine_mode mode)
3984
{
3985
  if (TARGET_64BIT)
3986
    {
3987
      switch (mode)
3988
        {
3989
        case SFmode:
3990
        case SCmode:
3991
        case DFmode:
3992
        case DCmode:
3993
        case TFmode:
3994
        case SDmode:
3995
        case DDmode:
3996
        case TDmode:
3997
          return gen_rtx_REG (mode, FIRST_SSE_REG);
3998
        case XFmode:
3999
        case XCmode:
4000
          return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4001
        case TCmode:
4002
          return NULL;
4003
        default:
4004
          return gen_rtx_REG (mode, 0);
4005
        }
4006
    }
4007
  else
4008
    return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4009
}
4010
 
4011
/* Given a mode, return the register to use for a return value.  */
4012
 
4013
static int
4014
ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4015
{
4016
  gcc_assert (!TARGET_64BIT);
4017
 
4018
  /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4019
     we normally prevent this case when mmx is not available.  However
4020
     some ABIs may require the result to be returned like DImode.  */
4021
  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4022
    return TARGET_MMX ? FIRST_MMX_REG : 0;
4023
 
4024
  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
4025
     we prevent this case when sse is not available.  However some ABIs
4026
     may require the result to be returned like integer TImode.  */
4027
  if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4028
    return TARGET_SSE ? FIRST_SSE_REG : 0;
4029
 
4030
  /* Decimal floating point values can go in %eax, unlike other float modes.  */
4031
  if (DECIMAL_FLOAT_MODE_P (mode))
4032
    return 0;
4033
 
4034
  /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values.  */
4035
  if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4036
    return 0;
4037
 
4038
  /* Floating point return values in %st(0), except for local functions when
4039
     SSE math is enabled or for functions with sseregparm attribute.  */
4040
  if ((func || fntype)
4041
      && (mode == SFmode || mode == DFmode))
4042
    {
4043
      int sse_level = ix86_function_sseregparm (fntype, func);
4044
      if ((sse_level >= 1 && mode == SFmode)
4045
          || (sse_level == 2 && mode == DFmode))
4046
        return FIRST_SSE_REG;
4047
    }
4048
 
4049
  return FIRST_FLOAT_REG;
4050
}
4051
 
4052
/* Create the va_list data type.  */
4053
 
4054
static tree
4055
ix86_build_builtin_va_list (void)
4056
{
4057
  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4058
 
4059
  /* For i386 we use plain pointer to argument area.  */
4060
  if (!TARGET_64BIT)
4061
    return build_pointer_type (char_type_node);
4062
 
4063
  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4064
  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4065
 
4066
  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4067
                      unsigned_type_node);
4068
  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4069
                      unsigned_type_node);
4070
  f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4071
                      ptr_type_node);
4072
  f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4073
                      ptr_type_node);
4074
 
4075
  va_list_gpr_counter_field = f_gpr;
4076
  va_list_fpr_counter_field = f_fpr;
4077
 
4078
  DECL_FIELD_CONTEXT (f_gpr) = record;
4079
  DECL_FIELD_CONTEXT (f_fpr) = record;
4080
  DECL_FIELD_CONTEXT (f_ovf) = record;
4081
  DECL_FIELD_CONTEXT (f_sav) = record;
4082
 
4083
  TREE_CHAIN (record) = type_decl;
4084
  TYPE_NAME (record) = type_decl;
4085
  TYPE_FIELDS (record) = f_gpr;
4086
  TREE_CHAIN (f_gpr) = f_fpr;
4087
  TREE_CHAIN (f_fpr) = f_ovf;
4088
  TREE_CHAIN (f_ovf) = f_sav;
4089
 
4090
  layout_type (record);
4091
 
4092
  /* The correct type is an array type of one element.  */
4093
  return build_array_type (record, build_index_type (size_zero_node));
4094
}
4095
 
4096
/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
4097
 
4098
static void
4099
ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4100
                             tree type, int *pretend_size ATTRIBUTE_UNUSED,
4101
                             int no_rtl)
4102
{
4103
  CUMULATIVE_ARGS next_cum;
4104
  rtx save_area = NULL_RTX, mem;
4105
  rtx label;
4106
  rtx label_ref;
4107
  rtx tmp_reg;
4108
  rtx nsse_reg;
4109
  int set;
4110
  tree fntype;
4111
  int stdarg_p;
4112
  int i;
4113
 
4114
  if (!TARGET_64BIT)
4115
    return;
4116
 
4117
  if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4118
    return;
4119
 
4120
  /* Indicate to allocate space on the stack for varargs save area.  */
4121
  ix86_save_varrargs_registers = 1;
4122
 
4123
  cfun->stack_alignment_needed = 128;
4124
 
4125
  fntype = TREE_TYPE (current_function_decl);
4126
  stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4127
              && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4128
                  != void_type_node));
4129
 
4130
  /* For varargs, we do not want to skip the dummy va_dcl argument.
4131
     For stdargs, we do want to skip the last named argument.  */
4132
  next_cum = *cum;
4133
  if (stdarg_p)
4134
    function_arg_advance (&next_cum, mode, type, 1);
4135
 
4136
  if (!no_rtl)
4137
    save_area = frame_pointer_rtx;
4138
 
4139
  set = get_varargs_alias_set ();
4140
 
4141
  for (i = next_cum.regno;
4142
       i < ix86_regparm
4143
       && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4144
       i++)
4145
    {
4146
      mem = gen_rtx_MEM (Pmode,
4147
                         plus_constant (save_area, i * UNITS_PER_WORD));
4148
      MEM_NOTRAP_P (mem) = 1;
4149
      set_mem_alias_set (mem, set);
4150
      emit_move_insn (mem, gen_rtx_REG (Pmode,
4151
                                        x86_64_int_parameter_registers[i]));
4152
    }
4153
 
4154
  if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4155
    {
4156
      /* Now emit code to save SSE registers.  The AX parameter contains number
4157
         of SSE parameter registers used to call this function.  We use
4158
         sse_prologue_save insn template that produces computed jump across
4159
         SSE saves.  We need some preparation work to get this working.  */
4160
 
4161
      label = gen_label_rtx ();
4162
      label_ref = gen_rtx_LABEL_REF (Pmode, label);
4163
 
4164
      /* Compute address to jump to :
4165
         label - 5*eax + nnamed_sse_arguments*5  */
4166
      tmp_reg = gen_reg_rtx (Pmode);
4167
      nsse_reg = gen_reg_rtx (Pmode);
4168
      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4169
      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4170
                              gen_rtx_MULT (Pmode, nsse_reg,
4171
                                            GEN_INT (4))));
4172
      if (next_cum.sse_regno)
4173
        emit_move_insn
4174
          (nsse_reg,
4175
           gen_rtx_CONST (DImode,
4176
                          gen_rtx_PLUS (DImode,
4177
                                        label_ref,
4178
                                        GEN_INT (next_cum.sse_regno * 4))));
4179
      else
4180
        emit_move_insn (nsse_reg, label_ref);
4181
      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4182
 
4183
      /* Compute address of memory block we save into.  We always use pointer
4184
         pointing 127 bytes after first byte to store - this is needed to keep
4185
         instruction size limited by 4 bytes.  */
4186
      tmp_reg = gen_reg_rtx (Pmode);
4187
      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4188
                              plus_constant (save_area,
4189
                                             8 * REGPARM_MAX + 127)));
4190
      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4191
      MEM_NOTRAP_P (mem) = 1;
4192
      set_mem_alias_set (mem, set);
4193
      set_mem_align (mem, BITS_PER_WORD);
4194
 
4195
      /* And finally do the dirty job!  */
4196
      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4197
                                        GEN_INT (next_cum.sse_regno), label));
4198
    }
4199
 
4200
}
4201
 
4202
/* Implement va_start.  */
4203
 
4204
void
4205
ix86_va_start (tree valist, rtx nextarg)
4206
{
4207
  HOST_WIDE_INT words, n_gpr, n_fpr;
4208
  tree f_gpr, f_fpr, f_ovf, f_sav;
4209
  tree gpr, fpr, ovf, sav, t;
4210
  tree type;
4211
 
4212
  /* Only 64bit target needs something special.  */
4213
  if (!TARGET_64BIT)
4214
    {
4215
      std_expand_builtin_va_start (valist, nextarg);
4216
      return;
4217
    }
4218
 
4219
  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4220
  f_fpr = TREE_CHAIN (f_gpr);
4221
  f_ovf = TREE_CHAIN (f_fpr);
4222
  f_sav = TREE_CHAIN (f_ovf);
4223
 
4224
  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4225
  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4226
  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4227
  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4228
  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4229
 
4230
  /* Count number of gp and fp argument registers used.  */
4231
  words = current_function_args_info.words;
4232
  n_gpr = current_function_args_info.regno;
4233
  n_fpr = current_function_args_info.sse_regno;
4234
 
4235
  if (TARGET_DEBUG_ARG)
4236
    fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4237
             (int) words, (int) n_gpr, (int) n_fpr);
4238
 
4239
  if (cfun->va_list_gpr_size)
4240
    {
4241
      type = TREE_TYPE (gpr);
4242
      t = build2 (MODIFY_EXPR, type, gpr,
4243
                  build_int_cst (type, n_gpr * 8));
4244
      TREE_SIDE_EFFECTS (t) = 1;
4245
      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4246
    }
4247
 
4248
  if (cfun->va_list_fpr_size)
4249
    {
4250
      type = TREE_TYPE (fpr);
4251
      t = build2 (MODIFY_EXPR, type, fpr,
4252
                  build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4253
      TREE_SIDE_EFFECTS (t) = 1;
4254
      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4255
    }
4256
 
4257
  /* Find the overflow area.  */
4258
  type = TREE_TYPE (ovf);
4259
  t = make_tree (type, virtual_incoming_args_rtx);
4260
  if (words != 0)
4261
    t = build2 (PLUS_EXPR, type, t,
4262
                build_int_cst (type, words * UNITS_PER_WORD));
4263
  t = build2 (MODIFY_EXPR, type, ovf, t);
4264
  TREE_SIDE_EFFECTS (t) = 1;
4265
  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4266
 
4267
  if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4268
    {
4269
      /* Find the register save area.
4270
         Prologue of the function save it right above stack frame.  */
4271
      type = TREE_TYPE (sav);
4272
      t = make_tree (type, frame_pointer_rtx);
4273
      t = build2 (MODIFY_EXPR, type, sav, t);
4274
      TREE_SIDE_EFFECTS (t) = 1;
4275
      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4276
    }
4277
}
4278
 
4279
/* Implement va_arg.  */
4280
 
4281
tree
4282
ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4283
{
4284
  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4285
  tree f_gpr, f_fpr, f_ovf, f_sav;
4286
  tree gpr, fpr, ovf, sav, t;
4287
  int size, rsize;
4288
  tree lab_false, lab_over = NULL_TREE;
4289
  tree addr, t2;
4290
  rtx container;
4291
  int indirect_p = 0;
4292
  tree ptrtype;
4293
  enum machine_mode nat_mode;
4294
 
4295
  /* Only 64bit target needs something special.  */
4296
  if (!TARGET_64BIT)
4297
    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4298
 
4299
  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4300
  f_fpr = TREE_CHAIN (f_gpr);
4301
  f_ovf = TREE_CHAIN (f_fpr);
4302
  f_sav = TREE_CHAIN (f_ovf);
4303
 
4304
  valist = build_va_arg_indirect_ref (valist);
4305
  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4306
  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4307
  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4308
  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4309
 
4310
  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4311
  if (indirect_p)
4312
    type = build_pointer_type (type);
4313
  size = int_size_in_bytes (type);
4314
  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4315
 
4316
  nat_mode = type_natural_mode (type);
4317
  container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4318
                                   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4319
 
4320
  /* Pull the value out of the saved registers.  */
4321
 
4322
  addr = create_tmp_var (ptr_type_node, "addr");
4323
  DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4324
 
4325
  if (container)
4326
    {
4327
      int needed_intregs, needed_sseregs;
4328
      bool need_temp;
4329
      tree int_addr, sse_addr;
4330
 
4331
      lab_false = create_artificial_label ();
4332
      lab_over = create_artificial_label ();
4333
 
4334
      examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4335
 
4336
      need_temp = (!REG_P (container)
4337
                   && ((needed_intregs && TYPE_ALIGN (type) > 64)
4338
                       || TYPE_ALIGN (type) > 128));
4339
 
4340
      /* In case we are passing structure, verify that it is consecutive block
4341
         on the register save area.  If not we need to do moves.  */
4342
      if (!need_temp && !REG_P (container))
4343
        {
4344
          /* Verify that all registers are strictly consecutive  */
4345
          if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4346
            {
4347
              int i;
4348
 
4349
              for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4350
                {
4351
                  rtx slot = XVECEXP (container, 0, i);
4352
                  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4353
                      || INTVAL (XEXP (slot, 1)) != i * 16)
4354
                    need_temp = 1;
4355
                }
4356
            }
4357
          else
4358
            {
4359
              int i;
4360
 
4361
              for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4362
                {
4363
                  rtx slot = XVECEXP (container, 0, i);
4364
                  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4365
                      || INTVAL (XEXP (slot, 1)) != i * 8)
4366
                    need_temp = 1;
4367
                }
4368
            }
4369
        }
4370
      if (!need_temp)
4371
        {
4372
          int_addr = addr;
4373
          sse_addr = addr;
4374
        }
4375
      else
4376
        {
4377
          int_addr = create_tmp_var (ptr_type_node, "int_addr");
4378
          DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4379
          sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4380
          DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4381
        }
4382
 
4383
      /* First ensure that we fit completely in registers.  */
4384
      if (needed_intregs)
4385
        {
4386
          t = build_int_cst (TREE_TYPE (gpr),
4387
                             (REGPARM_MAX - needed_intregs + 1) * 8);
4388
          t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4389
          t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4390
          t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4391
          gimplify_and_add (t, pre_p);
4392
        }
4393
      if (needed_sseregs)
4394
        {
4395
          t = build_int_cst (TREE_TYPE (fpr),
4396
                             (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4397
                             + REGPARM_MAX * 8);
4398
          t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4399
          t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4400
          t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4401
          gimplify_and_add (t, pre_p);
4402
        }
4403
 
4404
      /* Compute index to start of area used for integer regs.  */
4405
      if (needed_intregs)
4406
        {
4407
          /* int_addr = gpr + sav; */
4408
          t = fold_convert (ptr_type_node, gpr);
4409
          t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4410
          t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4411
          gimplify_and_add (t, pre_p);
4412
        }
4413
      if (needed_sseregs)
4414
        {
4415
          /* sse_addr = fpr + sav; */
4416
          t = fold_convert (ptr_type_node, fpr);
4417
          t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4418
          t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4419
          gimplify_and_add (t, pre_p);
4420
        }
4421
      if (need_temp)
4422
        {
4423
          int i;
4424
          tree temp = create_tmp_var (type, "va_arg_tmp");
4425
 
4426
          /* addr = &temp; */
4427
          t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4428
          t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4429
          gimplify_and_add (t, pre_p);
4430
 
4431
          for (i = 0; i < XVECLEN (container, 0); i++)
4432
            {
4433
              rtx slot = XVECEXP (container, 0, i);
4434
              rtx reg = XEXP (slot, 0);
4435
              enum machine_mode mode = GET_MODE (reg);
4436
              tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4437
              tree addr_type = build_pointer_type (piece_type);
4438
              tree src_addr, src;
4439
              int src_offset;
4440
              tree dest_addr, dest;
4441
 
4442
              if (SSE_REGNO_P (REGNO (reg)))
4443
                {
4444
                  src_addr = sse_addr;
4445
                  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4446
                }
4447
              else
4448
                {
4449
                  src_addr = int_addr;
4450
                  src_offset = REGNO (reg) * 8;
4451
                }
4452
              src_addr = fold_convert (addr_type, src_addr);
4453
              src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4454
                                       size_int (src_offset)));
4455
              src = build_va_arg_indirect_ref (src_addr);
4456
 
4457
              dest_addr = fold_convert (addr_type, addr);
4458
              dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4459
                                        size_int (INTVAL (XEXP (slot, 1)))));
4460
              dest = build_va_arg_indirect_ref (dest_addr);
4461
 
4462
              t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4463
              gimplify_and_add (t, pre_p);
4464
            }
4465
        }
4466
 
4467
      if (needed_intregs)
4468
        {
4469
          t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4470
                      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4471
          t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4472
          gimplify_and_add (t, pre_p);
4473
        }
4474
      if (needed_sseregs)
4475
        {
4476
          t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4477
                      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4478
          t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4479
          gimplify_and_add (t, pre_p);
4480
        }
4481
 
4482
      t = build1 (GOTO_EXPR, void_type_node, lab_over);
4483
      gimplify_and_add (t, pre_p);
4484
 
4485
      t = build1 (LABEL_EXPR, void_type_node, lab_false);
4486
      append_to_statement_list (t, pre_p);
4487
    }
4488
 
4489
  /* ... otherwise out of the overflow area.  */
4490
 
4491
  /* Care for on-stack alignment if needed.  */
4492
  if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4493
      || integer_zerop (TYPE_SIZE (type)))
4494
    t = ovf;
4495
  else
4496
    {
4497
      HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4498
      t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4499
                  build_int_cst (TREE_TYPE (ovf), align - 1));
4500
      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4501
                  build_int_cst (TREE_TYPE (t), -align));
4502
    }
4503
  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4504
 
4505
  t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4506
  gimplify_and_add (t2, pre_p);
4507
 
4508
  t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4509
              build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4510
  t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4511
  gimplify_and_add (t, pre_p);
4512
 
4513
  if (container)
4514
    {
4515
      t = build1 (LABEL_EXPR, void_type_node, lab_over);
4516
      append_to_statement_list (t, pre_p);
4517
    }
4518
 
4519
  ptrtype = build_pointer_type (type);
4520
  addr = fold_convert (ptrtype, addr);
4521
 
4522
  if (indirect_p)
4523
    addr = build_va_arg_indirect_ref (addr);
4524
  return build_va_arg_indirect_ref (addr);
4525
}
4526
 
4527
/* Return nonzero if OPNUM's MEM should be matched
4528
   in movabs* patterns.  */
4529
 
4530
int
4531
ix86_check_movabs (rtx insn, int opnum)
4532
{
4533
  rtx set, mem;
4534
 
4535
  set = PATTERN (insn);
4536
  if (GET_CODE (set) == PARALLEL)
4537
    set = XVECEXP (set, 0, 0);
4538
  gcc_assert (GET_CODE (set) == SET);
4539
  mem = XEXP (set, opnum);
4540
  while (GET_CODE (mem) == SUBREG)
4541
    mem = SUBREG_REG (mem);
4542
  gcc_assert (GET_CODE (mem) == MEM);
4543
  return (volatile_ok || !MEM_VOLATILE_P (mem));
4544
}
4545
 
4546
/* Initialize the table of extra 80387 mathematical constants.  */
4547
 
4548
static void
4549
init_ext_80387_constants (void)
4550
{
4551
  static const char * cst[5] =
4552
  {
4553
    "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
4554
    "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
4555
    "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
4556
    "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
4557
    "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
4558
  };
4559
  int i;
4560
 
4561
  for (i = 0; i < 5; i++)
4562
    {
4563
      real_from_string (&ext_80387_constants_table[i], cst[i]);
4564
      /* Ensure each constant is rounded to XFmode precision.  */
4565
      real_convert (&ext_80387_constants_table[i],
4566
                    XFmode, &ext_80387_constants_table[i]);
4567
    }
4568
 
4569
  ext_80387_constants_init = 1;
4570
}
4571
 
4572
/* Return true if the constant is something that can be loaded with
4573
   a special instruction.  */
4574
 
4575
int
4576
standard_80387_constant_p (rtx x)
4577
{
4578
  if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4579
    return -1;
4580
 
4581
  if (x == CONST0_RTX (GET_MODE (x)))
4582
    return 1;
4583
  if (x == CONST1_RTX (GET_MODE (x)))
4584
    return 2;
4585
 
4586
  /* For XFmode constants, try to find a special 80387 instruction when
4587
     optimizing for size or on those CPUs that benefit from them.  */
4588
  if (GET_MODE (x) == XFmode
4589
      && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4590
    {
4591
      REAL_VALUE_TYPE r;
4592
      int i;
4593
 
4594
      if (! ext_80387_constants_init)
4595
        init_ext_80387_constants ();
4596
 
4597
      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4598
      for (i = 0; i < 5; i++)
4599
        if (real_identical (&r, &ext_80387_constants_table[i]))
4600
          return i + 3;
4601
    }
4602
 
4603
  return 0;
4604
}
4605
 
4606
/* Return the opcode of the special instruction to be used to load
4607
   the constant X.  */
4608
 
4609
const char *
4610
standard_80387_constant_opcode (rtx x)
4611
{
4612
  switch (standard_80387_constant_p (x))
4613
    {
4614
    case 1:
4615
      return "fldz";
4616
    case 2:
4617
      return "fld1";
4618
    case 3:
4619
      return "fldlg2";
4620
    case 4:
4621
      return "fldln2";
4622
    case 5:
4623
      return "fldl2e";
4624
    case 6:
4625
      return "fldl2t";
4626
    case 7:
4627
      return "fldpi";
4628
    default:
4629
      gcc_unreachable ();
4630
    }
4631
}
4632
 
4633
/* Return the CONST_DOUBLE representing the 80387 constant that is
4634
   loaded by the specified special instruction.  The argument IDX
4635
   matches the return value from standard_80387_constant_p.  */
4636
 
4637
rtx
4638
standard_80387_constant_rtx (int idx)
4639
{
4640
  int i;
4641
 
4642
  if (! ext_80387_constants_init)
4643
    init_ext_80387_constants ();
4644
 
4645
  switch (idx)
4646
    {
4647
    case 3:
4648
    case 4:
4649
    case 5:
4650
    case 6:
4651
    case 7:
4652
      i = idx - 3;
4653
      break;
4654
 
4655
    default:
4656
      gcc_unreachable ();
4657
    }
4658
 
4659
  return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4660
                                       XFmode);
4661
}
4662
 
4663
/* Return 1 if mode is a valid mode for sse.  */
4664
static int
4665
standard_sse_mode_p (enum machine_mode mode)
4666
{
4667
  switch (mode)
4668
    {
4669
    case V16QImode:
4670
    case V8HImode:
4671
    case V4SImode:
4672
    case V2DImode:
4673
    case V4SFmode:
4674
    case V2DFmode:
4675
      return 1;
4676
 
4677
    default:
4678
      return 0;
4679
    }
4680
}
4681
 
4682
/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4683
 */
4684
int
4685
standard_sse_constant_p (rtx x)
4686
{
4687
  enum machine_mode mode = GET_MODE (x);
4688
 
4689
  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4690
    return 1;
4691
  if (vector_all_ones_operand (x, mode)
4692
      && standard_sse_mode_p (mode))
4693
    return TARGET_SSE2 ? 2 : -1;
4694
 
4695
  return 0;
4696
}
4697
 
4698
/* Return the opcode of the special instruction to be used to load
4699
   the constant X.  */
4700
 
4701
const char *
4702
standard_sse_constant_opcode (rtx insn, rtx x)
4703
{
4704
  switch (standard_sse_constant_p (x))
4705
    {
4706
    case 1:
4707
      if (get_attr_mode (insn) == MODE_V4SF)
4708
        return "xorps\t%0, %0";
4709
      else if (get_attr_mode (insn) == MODE_V2DF)
4710
        return "xorpd\t%0, %0";
4711
      else
4712
        return "pxor\t%0, %0";
4713
    case 2:
4714
      return "pcmpeqd\t%0, %0";
4715
    }
4716
  gcc_unreachable ();
4717
}
4718
 
4719
/* Returns 1 if OP contains a symbol reference */
4720
 
4721
int
4722
symbolic_reference_mentioned_p (rtx op)
4723
{
4724
  const char *fmt;
4725
  int i;
4726
 
4727
  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4728
    return 1;
4729
 
4730
  fmt = GET_RTX_FORMAT (GET_CODE (op));
4731
  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4732
    {
4733
      if (fmt[i] == 'E')
4734
        {
4735
          int j;
4736
 
4737
          for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4738
            if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4739
              return 1;
4740
        }
4741
 
4742
      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4743
        return 1;
4744
    }
4745
 
4746
  return 0;
4747
}
4748
 
4749
/* Return 1 if it is appropriate to emit `ret' instructions in the
4750
   body of a function.  Do this only if the epilogue is simple, needing a
4751
   couple of insns.  Prior to reloading, we can't tell how many registers
4752
   must be saved, so return 0 then.  Return 0 if there is no frame
4753
   marker to de-allocate.  */
4754
 
4755
int
4756
ix86_can_use_return_insn_p (void)
4757
{
4758
  struct ix86_frame frame;
4759
 
4760
  if (! reload_completed || frame_pointer_needed)
4761
    return 0;
4762
 
4763
  /* Don't allow more than 32 pop, since that's all we can do
4764
     with one instruction.  */
4765
  if (current_function_pops_args
4766
      && current_function_args_size >= 32768)
4767
    return 0;
4768
 
4769
  ix86_compute_frame_layout (&frame);
4770
  return frame.to_allocate == 0 && frame.nregs == 0;
4771
}
4772
 
4773
/* Value should be nonzero if functions must have frame pointers.
4774
   Zero means the frame pointer need not be set up (and parms may
4775
   be accessed via the stack pointer) in functions that seem suitable.  */
4776
 
4777
int
4778
ix86_frame_pointer_required (void)
4779
{
4780
  /* If we accessed previous frames, then the generated code expects
4781
     to be able to access the saved ebp value in our frame.  */
4782
  if (cfun->machine->accesses_prev_frame)
4783
    return 1;
4784
 
4785
  /* Several x86 os'es need a frame pointer for other reasons,
4786
     usually pertaining to setjmp.  */
4787
  if (SUBTARGET_FRAME_POINTER_REQUIRED)
4788
    return 1;
4789
 
4790
  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4791
     the frame pointer by default.  Turn it back on now if we've not
4792
     got a leaf function.  */
4793
  if (TARGET_OMIT_LEAF_FRAME_POINTER
4794
      && (!current_function_is_leaf
4795
          || ix86_current_function_calls_tls_descriptor))
4796
    return 1;
4797
 
4798
  if (current_function_profile)
4799
    return 1;
4800
 
4801
  return 0;
4802
}
4803
 
4804
/* Record that the current function accesses previous call frames.  */
4805
 
4806
void
4807
ix86_setup_frame_addresses (void)
4808
{
4809
  cfun->machine->accesses_prev_frame = 1;
4810
}
4811
 
4812
#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4813
# define USE_HIDDEN_LINKONCE 1
4814
#else
4815
# define USE_HIDDEN_LINKONCE 0
4816
#endif
4817
 
4818
static int pic_labels_used;
4819
 
4820
/* Fills in the label name that should be used for a pc thunk for
4821
   the given register.  */
4822
 
4823
static void
4824
get_pc_thunk_name (char name[32], unsigned int regno)
4825
{
4826
  gcc_assert (!TARGET_64BIT);
4827
 
4828
  if (USE_HIDDEN_LINKONCE)
4829
    sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4830
  else
4831
    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4832
}
4833
 
4834
 
4835
/* This function generates code for -fpic that loads %ebx with
4836
   the return address of the caller and then returns.  */
4837
 
4838
void
4839
ix86_file_end (void)
4840
{
4841
  rtx xops[2];
4842
  int regno;
4843
 
4844
  for (regno = 0; regno < 8; ++regno)
4845
    {
4846
      char name[32];
4847
 
4848
      if (! ((pic_labels_used >> regno) & 1))
4849
        continue;
4850
 
4851
      get_pc_thunk_name (name, regno);
4852
 
4853
#if TARGET_MACHO
4854
      if (TARGET_MACHO)
4855
        {
4856
          switch_to_section (darwin_sections[text_coal_section]);
4857
          fputs ("\t.weak_definition\t", asm_out_file);
4858
          assemble_name (asm_out_file, name);
4859
          fputs ("\n\t.private_extern\t", asm_out_file);
4860
          assemble_name (asm_out_file, name);
4861
          fputs ("\n", asm_out_file);
4862
          ASM_OUTPUT_LABEL (asm_out_file, name);
4863
        }
4864
      else
4865
#endif
4866
      if (USE_HIDDEN_LINKONCE)
4867
        {
4868
          tree decl;
4869
 
4870
          decl = build_decl (FUNCTION_DECL, get_identifier (name),
4871
                             error_mark_node);
4872
          TREE_PUBLIC (decl) = 1;
4873
          TREE_STATIC (decl) = 1;
4874
          DECL_ONE_ONLY (decl) = 1;
4875
 
4876
          (*targetm.asm_out.unique_section) (decl, 0);
4877
          switch_to_section (get_named_section (decl, NULL, 0));
4878
 
4879
          (*targetm.asm_out.globalize_label) (asm_out_file, name);
4880
          fputs ("\t.hidden\t", asm_out_file);
4881
          assemble_name (asm_out_file, name);
4882
          fputc ('\n', asm_out_file);
4883
          ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4884
        }
4885
      else
4886
        {
4887
          switch_to_section (text_section);
4888
          ASM_OUTPUT_LABEL (asm_out_file, name);
4889
        }
4890
 
4891
      xops[0] = gen_rtx_REG (SImode, regno);
4892
      xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4893
      output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4894
      output_asm_insn ("ret", xops);
4895
    }
4896
 
4897
  if (NEED_INDICATE_EXEC_STACK)
4898
    file_end_indicate_exec_stack ();
4899
}
4900
 
4901
/* Emit code for the SET_GOT patterns.  */
4902
 
4903
const char *
4904
output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4905
{
4906
  rtx xops[3];
4907
 
4908
  xops[0] = dest;
4909
  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4910
 
4911
  if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4912
    {
4913
      xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4914
 
4915
      if (!flag_pic)
4916
        output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4917
      else
4918
        output_asm_insn ("call\t%a2", xops);
4919
 
4920
#if TARGET_MACHO
4921
      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
4922
         is what will be referenced by the Mach-O PIC subsystem.  */
4923
      if (!label)
4924
        ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4925
#endif
4926
 
4927
      (*targetm.asm_out.internal_label) (asm_out_file, "L",
4928
                                 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4929
 
4930
      if (flag_pic)
4931
        output_asm_insn ("pop{l}\t%0", xops);
4932
    }
4933
  else
4934
    {
4935
      char name[32];
4936
      get_pc_thunk_name (name, REGNO (dest));
4937
      pic_labels_used |= 1 << REGNO (dest);
4938
 
4939
      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4940
      xops[2] = gen_rtx_MEM (QImode, xops[2]);
4941
      output_asm_insn ("call\t%X2", xops);
4942
      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
4943
         is what will be referenced by the Mach-O PIC subsystem.  */
4944
#if TARGET_MACHO
4945
      if (!label)
4946
        ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4947
      else
4948
        targetm.asm_out.internal_label (asm_out_file, "L",
4949
                                           CODE_LABEL_NUMBER (label));
4950
#endif
4951
    }
4952
 
4953
  if (TARGET_MACHO)
4954
    return "";
4955
 
4956
  if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4957
    output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4958
  else
4959
    output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4960
 
4961
  return "";
4962
}
4963
 
4964
/* Generate an "push" pattern for input ARG.  */
4965
 
4966
static rtx
4967
gen_push (rtx arg)
4968
{
4969
  return gen_rtx_SET (VOIDmode,
4970
                      gen_rtx_MEM (Pmode,
4971
                                   gen_rtx_PRE_DEC (Pmode,
4972
                                                    stack_pointer_rtx)),
4973
                      arg);
4974
}
4975
 
4976
/* Return >= 0 if there is an unused call-clobbered register available
4977
   for the entire function.  */
4978
 
4979
static unsigned int
4980
ix86_select_alt_pic_regnum (void)
4981
{
4982
  if (current_function_is_leaf && !current_function_profile
4983
      && !ix86_current_function_calls_tls_descriptor)
4984
    {
4985
      int i;
4986
      for (i = 2; i >= 0; --i)
4987
        if (!regs_ever_live[i])
4988
          return i;
4989
    }
4990
 
4991
  return INVALID_REGNUM;
4992
}
4993
 
4994
/* Return 1 if we need to save REGNO.  */
4995
static int
4996
ix86_save_reg (unsigned int regno, int maybe_eh_return)
4997
{
4998
  if (pic_offset_table_rtx
4999
      && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5000
      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5001
          || current_function_profile
5002
          || current_function_calls_eh_return
5003
          || current_function_uses_const_pool))
5004
    {
5005
      if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5006
        return 0;
5007
      return 1;
5008
    }
5009
 
5010
  if (current_function_calls_eh_return && maybe_eh_return)
5011
    {
5012
      unsigned i;
5013
      for (i = 0; ; i++)
5014
        {
5015
          unsigned test = EH_RETURN_DATA_REGNO (i);
5016
          if (test == INVALID_REGNUM)
5017
            break;
5018
          if (test == regno)
5019
            return 1;
5020
        }
5021
    }
5022
 
5023
  if (cfun->machine->force_align_arg_pointer
5024
      && regno == REGNO (cfun->machine->force_align_arg_pointer))
5025
    return 1;
5026
 
5027
  return (regs_ever_live[regno]
5028
          && !call_used_regs[regno]
5029
          && !fixed_regs[regno]
5030
          && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5031
}
5032
 
5033
/* Return number of registers to be saved on the stack.  */
5034
 
5035
static int
5036
ix86_nsaved_regs (void)
5037
{
5038
  int nregs = 0;
5039
  int regno;
5040
 
5041
  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5042
    if (ix86_save_reg (regno, true))
5043
      nregs++;
5044
  return nregs;
5045
}
5046
 
5047
/* Return the offset between two registers, one to be eliminated, and the other
5048
   its replacement, at the start of a routine.  */
5049
 
5050
HOST_WIDE_INT
5051
ix86_initial_elimination_offset (int from, int to)
5052
{
5053
  struct ix86_frame frame;
5054
  ix86_compute_frame_layout (&frame);
5055
 
5056
  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5057
    return frame.hard_frame_pointer_offset;
5058
  else if (from == FRAME_POINTER_REGNUM
5059
           && to == HARD_FRAME_POINTER_REGNUM)
5060
    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5061
  else
5062
    {
5063
      gcc_assert (to == STACK_POINTER_REGNUM);
5064
 
5065
      if (from == ARG_POINTER_REGNUM)
5066
        return frame.stack_pointer_offset;
5067
 
5068
      gcc_assert (from == FRAME_POINTER_REGNUM);
5069
      return frame.stack_pointer_offset - frame.frame_pointer_offset;
5070
    }
5071
}
5072
 
5073
/* Fill structure ix86_frame about frame of currently computed function.  */
5074
 
5075
static void
5076
ix86_compute_frame_layout (struct ix86_frame *frame)
5077
{
5078
  HOST_WIDE_INT total_size;
5079
  unsigned int stack_alignment_needed;
5080
  HOST_WIDE_INT offset;
5081
  unsigned int preferred_alignment;
5082
  HOST_WIDE_INT size = get_frame_size ();
5083
 
5084
  frame->nregs = ix86_nsaved_regs ();
5085
  total_size = size;
5086
 
5087
  stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5088
  preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5089
 
5090
  /* During reload iteration the amount of registers saved can change.
5091
     Recompute the value as needed.  Do not recompute when amount of registers
5092
     didn't change as reload does multiple calls to the function and does not
5093
     expect the decision to change within single iteration.  */
5094
  if (!optimize_size
5095
      && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5096
    {
5097
      int count = frame->nregs;
5098
 
5099
      cfun->machine->use_fast_prologue_epilogue_nregs = count;
5100
      /* The fast prologue uses move instead of push to save registers.  This
5101
         is significantly longer, but also executes faster as modern hardware
5102
         can execute the moves in parallel, but can't do that for push/pop.
5103
 
5104
         Be careful about choosing what prologue to emit:  When function takes
5105
         many instructions to execute we may use slow version as well as in
5106
         case function is known to be outside hot spot (this is known with
5107
         feedback only).  Weight the size of function by number of registers
5108
         to save as it is cheap to use one or two push instructions but very
5109
         slow to use many of them.  */
5110
      if (count)
5111
        count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5112
      if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5113
          || (flag_branch_probabilities
5114
              && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5115
        cfun->machine->use_fast_prologue_epilogue = false;
5116
      else
5117
        cfun->machine->use_fast_prologue_epilogue
5118
           = !expensive_function_p (count);
5119
    }
5120
  if (TARGET_PROLOGUE_USING_MOVE
5121
      && cfun->machine->use_fast_prologue_epilogue)
5122
    frame->save_regs_using_mov = true;
5123
  else
5124
    frame->save_regs_using_mov = false;
5125
 
5126
 
5127
  /* Skip return address and saved base pointer.  */
5128
  offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5129
 
5130
  frame->hard_frame_pointer_offset = offset;
5131
 
5132
  /* Do some sanity checking of stack_alignment_needed and
5133
     preferred_alignment, since i386 port is the only using those features
5134
     that may break easily.  */
5135
 
5136
  gcc_assert (!size || stack_alignment_needed);
5137
  gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5138
  gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5139
  gcc_assert (stack_alignment_needed
5140
              <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5141
 
5142
  if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5143
    stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5144
 
5145
  /* Register save area */
5146
  offset += frame->nregs * UNITS_PER_WORD;
5147
 
5148
  /* Va-arg area */
5149
  if (ix86_save_varrargs_registers)
5150
    {
5151
      offset += X86_64_VARARGS_SIZE;
5152
      frame->va_arg_size = X86_64_VARARGS_SIZE;
5153
    }
5154
  else
5155
    frame->va_arg_size = 0;
5156
 
5157
  /* Align start of frame for local function.  */
5158
  frame->padding1 = ((offset + stack_alignment_needed - 1)
5159
                     & -stack_alignment_needed) - offset;
5160
 
5161
  offset += frame->padding1;
5162
 
5163
  /* Frame pointer points here.  */
5164
  frame->frame_pointer_offset = offset;
5165
 
5166
  offset += size;
5167
 
5168
  /* Add outgoing arguments area.  Can be skipped if we eliminated
5169
     all the function calls as dead code.
5170
     Skipping is however impossible when function calls alloca.  Alloca
5171
     expander assumes that last current_function_outgoing_args_size
5172
     of stack frame are unused.  */
5173
  if (ACCUMULATE_OUTGOING_ARGS
5174
      && (!current_function_is_leaf || current_function_calls_alloca
5175
          || ix86_current_function_calls_tls_descriptor))
5176
    {
5177
      offset += current_function_outgoing_args_size;
5178
      frame->outgoing_arguments_size = current_function_outgoing_args_size;
5179
    }
5180
  else
5181
    frame->outgoing_arguments_size = 0;
5182
 
5183
  /* Align stack boundary.  Only needed if we're calling another function
5184
     or using alloca.  */
5185
  if (!current_function_is_leaf || current_function_calls_alloca
5186
      || ix86_current_function_calls_tls_descriptor)
5187
    frame->padding2 = ((offset + preferred_alignment - 1)
5188
                       & -preferred_alignment) - offset;
5189
  else
5190
    frame->padding2 = 0;
5191
 
5192
  offset += frame->padding2;
5193
 
5194
  /* We've reached end of stack frame.  */
5195
  frame->stack_pointer_offset = offset;
5196
 
5197
  /* Size prologue needs to allocate.  */
5198
  frame->to_allocate =
5199
    (size + frame->padding1 + frame->padding2
5200
     + frame->outgoing_arguments_size + frame->va_arg_size);
5201
 
5202
  if ((!frame->to_allocate && frame->nregs <= 1)
5203
      || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5204
    frame->save_regs_using_mov = false;
5205
 
5206
  if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5207
      && current_function_is_leaf
5208
      && !ix86_current_function_calls_tls_descriptor)
5209
    {
5210
      frame->red_zone_size = frame->to_allocate;
5211
      if (frame->save_regs_using_mov)
5212
        frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5213
      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5214
        frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5215
    }
5216
  else
5217
    frame->red_zone_size = 0;
5218
  frame->to_allocate -= frame->red_zone_size;
5219
  frame->stack_pointer_offset -= frame->red_zone_size;
5220
#if 0
5221
  fprintf (stderr, "nregs: %i\n", frame->nregs);
5222
  fprintf (stderr, "size: %i\n", size);
5223
  fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5224
  fprintf (stderr, "padding1: %i\n", frame->padding1);
5225
  fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5226
  fprintf (stderr, "padding2: %i\n", frame->padding2);
5227
  fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5228
  fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5229
  fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5230
  fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5231
           frame->hard_frame_pointer_offset);
5232
  fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5233
#endif
5234
}
5235
 
5236
/* Emit code to save registers in the prologue.  */
5237
 
5238
static void
5239
ix86_emit_save_regs (void)
5240
{
5241
  unsigned int regno;
5242
  rtx insn;
5243
 
5244
  for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5245
    if (ix86_save_reg (regno, true))
5246
      {
5247
        insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5248
        RTX_FRAME_RELATED_P (insn) = 1;
5249
      }
5250
}
5251
 
5252
/* Emit code to save registers using MOV insns.  First register
5253
   is restored from POINTER + OFFSET.  */
5254
static void
5255
ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5256
{
5257
  unsigned int regno;
5258
  rtx insn;
5259
 
5260
  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5261
    if (ix86_save_reg (regno, true))
5262
      {
5263
        insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5264
                                               Pmode, offset),
5265
                               gen_rtx_REG (Pmode, regno));
5266
        RTX_FRAME_RELATED_P (insn) = 1;
5267
        offset += UNITS_PER_WORD;
5268
      }
5269
}
5270
 
5271
/* Expand prologue or epilogue stack adjustment.
5272
   The pattern exist to put a dependency on all ebp-based memory accesses.
5273
   STYLE should be negative if instructions should be marked as frame related,
5274
   zero if %r11 register is live and cannot be freely used and positive
5275
   otherwise.  */
5276
 
5277
static void
5278
pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5279
{
5280
  rtx insn;
5281
 
5282
  if (! TARGET_64BIT)
5283
    insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5284
  else if (x86_64_immediate_operand (offset, DImode))
5285
    insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5286
  else
5287
    {
5288
      rtx r11;
5289
      /* r11 is used by indirect sibcall return as well, set before the
5290
         epilogue and used after the epilogue.  ATM indirect sibcall
5291
         shouldn't be used together with huge frame sizes in one
5292
         function because of the frame_size check in sibcall.c.  */
5293
      gcc_assert (style);
5294
      r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5295
      insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5296
      if (style < 0)
5297
        RTX_FRAME_RELATED_P (insn) = 1;
5298
      insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5299
                                                               offset));
5300
    }
5301
  if (style < 0)
5302
    RTX_FRAME_RELATED_P (insn) = 1;
5303
}
5304
 
5305
/* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
5306
 
5307
static rtx
5308
ix86_internal_arg_pointer (void)
5309
{
5310
  bool has_force_align_arg_pointer =
5311
    (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5312
                            TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5313
  if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5314
       && DECL_NAME (current_function_decl)
5315
       && MAIN_NAME_P (DECL_NAME (current_function_decl))
5316
       && DECL_FILE_SCOPE_P (current_function_decl))
5317
      || ix86_force_align_arg_pointer
5318
      || has_force_align_arg_pointer)
5319
    {
5320
      /* Nested functions can't realign the stack due to a register
5321
         conflict.  */
5322
      if (DECL_CONTEXT (current_function_decl)
5323
          && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5324
        {
5325
          if (ix86_force_align_arg_pointer)
5326
            warning (0, "-mstackrealign ignored for nested functions");
5327
          if (has_force_align_arg_pointer)
5328
            error ("%s not supported for nested functions",
5329
                   ix86_force_align_arg_pointer_string);
5330
          return virtual_incoming_args_rtx;
5331
        }
5332
      cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5333
      return copy_to_reg (cfun->machine->force_align_arg_pointer);
5334
    }
5335
  else
5336
    return virtual_incoming_args_rtx;
5337
}
5338
 
5339
/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5340
   This is called from dwarf2out.c to emit call frame instructions
5341
   for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5342
static void
5343
ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5344
{
5345
  rtx unspec = SET_SRC (pattern);
5346
  gcc_assert (GET_CODE (unspec) == UNSPEC);
5347
 
5348
  switch (index)
5349
    {
5350
    case UNSPEC_REG_SAVE:
5351
      dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5352
                              SET_DEST (pattern));
5353
      break;
5354
    case UNSPEC_DEF_CFA:
5355
      dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5356
                         INTVAL (XVECEXP (unspec, 0, 0)));
5357
      break;
5358
    default:
5359
      gcc_unreachable ();
5360
    }
5361
}
5362
 
5363
/* Expand the prologue into a bunch of separate insns.  */
5364
 
5365
void
5366
ix86_expand_prologue (void)
5367
{
5368
  rtx insn;
5369
  bool pic_reg_used;
5370
  struct ix86_frame frame;
5371
  HOST_WIDE_INT allocate;
5372
 
5373
  ix86_compute_frame_layout (&frame);
5374
 
5375
  if (cfun->machine->force_align_arg_pointer)
5376
    {
5377
      rtx x, y;
5378
 
5379
      /* Grab the argument pointer.  */
5380
      x = plus_constant (stack_pointer_rtx, 4);
5381
      y = cfun->machine->force_align_arg_pointer;
5382
      insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5383
      RTX_FRAME_RELATED_P (insn) = 1;
5384
 
5385
      /* The unwind info consists of two parts: install the fafp as the cfa,
5386
         and record the fafp as the "save register" of the stack pointer.
5387
         The later is there in order that the unwinder can see where it
5388
         should restore the stack pointer across the and insn.  */
5389
      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5390
      x = gen_rtx_SET (VOIDmode, y, x);
5391
      RTX_FRAME_RELATED_P (x) = 1;
5392
      y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5393
                          UNSPEC_REG_SAVE);
5394
      y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5395
      RTX_FRAME_RELATED_P (y) = 1;
5396
      x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5397
      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5398
      REG_NOTES (insn) = x;
5399
 
5400
      /* Align the stack.  */
5401
      emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5402
                             GEN_INT (-16)));
5403
 
5404
      /* And here we cheat like madmen with the unwind info.  We force the
5405
         cfa register back to sp+4, which is exactly what it was at the
5406
         start of the function.  Re-pushing the return address results in
5407
         the return at the same spot relative to the cfa, and thus is
5408
         correct wrt the unwind info.  */
5409
      x = cfun->machine->force_align_arg_pointer;
5410
      x = gen_frame_mem (Pmode, plus_constant (x, -4));
5411
      insn = emit_insn (gen_push (x));
5412
      RTX_FRAME_RELATED_P (insn) = 1;
5413
 
5414
      x = GEN_INT (4);
5415
      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5416
      x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5417
      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5418
      REG_NOTES (insn) = x;
5419
    }
5420
 
5421
  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
5422
     slower on all targets.  Also sdb doesn't like it.  */
5423
 
5424
  if (frame_pointer_needed)
5425
    {
5426
      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5427
      RTX_FRAME_RELATED_P (insn) = 1;
5428
 
5429
      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5430
      RTX_FRAME_RELATED_P (insn) = 1;
5431
    }
5432
 
5433
  allocate = frame.to_allocate;
5434
 
5435
  if (!frame.save_regs_using_mov)
5436
    ix86_emit_save_regs ();
5437
  else
5438
    allocate += frame.nregs * UNITS_PER_WORD;
5439
 
5440
  /* When using red zone we may start register saving before allocating
5441
     the stack frame saving one cycle of the prologue.  */
5442
  if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5443
    ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5444
                                   : stack_pointer_rtx,
5445
                                   -frame.nregs * UNITS_PER_WORD);
5446
 
5447
  if (allocate == 0)
5448
    ;
5449
  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5450
    pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5451
                               GEN_INT (-allocate), -1);
5452
  else
5453
    {
5454
      /* Only valid for Win32.  */
5455
      rtx eax = gen_rtx_REG (SImode, 0);
5456
      bool eax_live = ix86_eax_live_at_start_p ();
5457
      rtx t;
5458
 
5459
      gcc_assert (!TARGET_64BIT);
5460
 
5461
      if (eax_live)
5462
        {
5463
          emit_insn (gen_push (eax));
5464
          allocate -= 4;
5465
        }
5466
 
5467
      emit_move_insn (eax, GEN_INT (allocate));
5468
 
5469
      insn = emit_insn (gen_allocate_stack_worker (eax));
5470
      RTX_FRAME_RELATED_P (insn) = 1;
5471
      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5472
      t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5473
      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5474
                                            t, REG_NOTES (insn));
5475
 
5476
      if (eax_live)
5477
        {
5478
          if (frame_pointer_needed)
5479
            t = plus_constant (hard_frame_pointer_rtx,
5480
                               allocate
5481
                               - frame.to_allocate
5482
                               - frame.nregs * UNITS_PER_WORD);
5483
          else
5484
            t = plus_constant (stack_pointer_rtx, allocate);
5485
          emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5486
        }
5487
    }
5488
 
5489
  if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5490
    {
5491
      if (!frame_pointer_needed || !frame.to_allocate)
5492
        ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5493
      else
5494
        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5495
                                       -frame.nregs * UNITS_PER_WORD);
5496
    }
5497
 
5498
  pic_reg_used = false;
5499
  if (pic_offset_table_rtx
5500
      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5501
          || current_function_profile))
5502
    {
5503
      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5504
 
5505
      if (alt_pic_reg_used != INVALID_REGNUM)
5506
        REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5507
 
5508
      pic_reg_used = true;
5509
    }
5510
 
5511
  if (pic_reg_used)
5512
    {
5513
      if (TARGET_64BIT)
5514
        insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5515
      else
5516
        insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5517
 
5518
      /* Even with accurate pre-reload life analysis, we can wind up
5519
         deleting all references to the pic register after reload.
5520
         Consider if cross-jumping unifies two sides of a branch
5521
         controlled by a comparison vs the only read from a global.
5522
         In which case, allow the set_got to be deleted, though we're
5523
         too late to do anything about the ebx save in the prologue.  */
5524
      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5525
    }
5526
 
5527
  /* Prevent function calls from be scheduled before the call to mcount.
5528
     In the pic_reg_used case, make sure that the got load isn't deleted.  */
5529
  if (current_function_profile)
5530
    emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5531
}
5532
 
5533
/* Emit code to restore saved registers using MOV insns.  First register
5534
   is restored from POINTER + OFFSET.  */
5535
static void
5536
ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5537
                                  int maybe_eh_return)
5538
{
5539
  int regno;
5540
  rtx base_address = gen_rtx_MEM (Pmode, pointer);
5541
 
5542
  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5543
    if (ix86_save_reg (regno, maybe_eh_return))
5544
      {
5545
        /* Ensure that adjust_address won't be forced to produce pointer
5546
           out of range allowed by x86-64 instruction set.  */
5547
        if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5548
          {
5549
            rtx r11;
5550
 
5551
            r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5552
            emit_move_insn (r11, GEN_INT (offset));
5553
            emit_insn (gen_adddi3 (r11, r11, pointer));
5554
            base_address = gen_rtx_MEM (Pmode, r11);
5555
            offset = 0;
5556
          }
5557
        emit_move_insn (gen_rtx_REG (Pmode, regno),
5558
                        adjust_address (base_address, Pmode, offset));
5559
        offset += UNITS_PER_WORD;
5560
      }
5561
}
5562
 
5563
/* Restore function stack, frame, and registers.  */
5564
 
5565
void
5566
ix86_expand_epilogue (int style)
5567
{
5568
  int regno;
5569
  int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5570
  struct ix86_frame frame;
5571
  HOST_WIDE_INT offset;
5572
 
5573
  ix86_compute_frame_layout (&frame);
5574
 
5575
  /* Calculate start of saved registers relative to ebp.  Special care
5576
     must be taken for the normal return case of a function using
5577
     eh_return: the eax and edx registers are marked as saved, but not
5578
     restored along this path.  */
5579
  offset = frame.nregs;
5580
  if (current_function_calls_eh_return && style != 2)
5581
    offset -= 2;
5582
  offset *= -UNITS_PER_WORD;
5583
 
5584
  /* If we're only restoring one register and sp is not valid then
5585
     using a move instruction to restore the register since it's
5586
     less work than reloading sp and popping the register.
5587
 
5588
     The default code result in stack adjustment using add/lea instruction,
5589
     while this code results in LEAVE instruction (or discrete equivalent),
5590
     so it is profitable in some other cases as well.  Especially when there
5591
     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
5592
     and there is exactly one register to pop. This heuristic may need some
5593
     tuning in future.  */
5594
  if ((!sp_valid && frame.nregs <= 1)
5595
      || (TARGET_EPILOGUE_USING_MOVE
5596
          && cfun->machine->use_fast_prologue_epilogue
5597
          && (frame.nregs > 1 || frame.to_allocate))
5598
      || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5599
      || (frame_pointer_needed && TARGET_USE_LEAVE
5600
          && cfun->machine->use_fast_prologue_epilogue
5601
          && frame.nregs == 1)
5602
      || current_function_calls_eh_return)
5603
    {
5604
      /* Restore registers.  We can use ebp or esp to address the memory
5605
         locations.  If both are available, default to ebp, since offsets
5606
         are known to be small.  Only exception is esp pointing directly to the
5607
         end of block of saved registers, where we may simplify addressing
5608
         mode.  */
5609
 
5610
      if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5611
        ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5612
                                          frame.to_allocate, style == 2);
5613
      else
5614
        ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5615
                                          offset, style == 2);
5616
 
5617
      /* eh_return epilogues need %ecx added to the stack pointer.  */
5618
      if (style == 2)
5619
        {
5620
          rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5621
 
5622
          if (frame_pointer_needed)
5623
            {
5624
              tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5625
              tmp = plus_constant (tmp, UNITS_PER_WORD);
5626
              emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5627
 
5628
              tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5629
              emit_move_insn (hard_frame_pointer_rtx, tmp);
5630
 
5631
              pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5632
                                         const0_rtx, style);
5633
            }
5634
          else
5635
            {
5636
              tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5637
              tmp = plus_constant (tmp, (frame.to_allocate
5638
                                         + frame.nregs * UNITS_PER_WORD));
5639
              emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5640
            }
5641
        }
5642
      else if (!frame_pointer_needed)
5643
        pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5644
                                   GEN_INT (frame.to_allocate
5645
                                            + frame.nregs * UNITS_PER_WORD),
5646
                                   style);
5647
      /* If not an i386, mov & pop is faster than "leave".  */
5648
      else if (TARGET_USE_LEAVE || optimize_size
5649
               || !cfun->machine->use_fast_prologue_epilogue)
5650
        emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5651
      else
5652
        {
5653
          pro_epilogue_adjust_stack (stack_pointer_rtx,
5654
                                     hard_frame_pointer_rtx,
5655
                                     const0_rtx, style);
5656
          if (TARGET_64BIT)
5657
            emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5658
          else
5659
            emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5660
        }
5661
    }
5662
  else
5663
    {
5664
      /* First step is to deallocate the stack frame so that we can
5665
         pop the registers.  */
5666
      if (!sp_valid)
5667
        {
5668
          gcc_assert (frame_pointer_needed);
5669
          pro_epilogue_adjust_stack (stack_pointer_rtx,
5670
                                     hard_frame_pointer_rtx,
5671
                                     GEN_INT (offset), style);
5672
        }
5673
      else if (frame.to_allocate)
5674
        pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5675
                                   GEN_INT (frame.to_allocate), style);
5676
 
5677
      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5678
        if (ix86_save_reg (regno, false))
5679
          {
5680
            if (TARGET_64BIT)
5681
              emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5682
            else
5683
              emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5684
          }
5685
      if (frame_pointer_needed)
5686
        {
5687
          /* Leave results in shorter dependency chains on CPUs that are
5688
             able to grok it fast.  */
5689
          if (TARGET_USE_LEAVE)
5690
            emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5691
          else if (TARGET_64BIT)
5692
            emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5693
          else
5694
            emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5695
        }
5696
    }
5697
 
5698
  if (cfun->machine->force_align_arg_pointer)
5699
    {
5700
      emit_insn (gen_addsi3 (stack_pointer_rtx,
5701
                             cfun->machine->force_align_arg_pointer,
5702
                             GEN_INT (-4)));
5703
    }
5704
 
5705
  /* Sibcall epilogues don't want a return instruction.  */
5706
  if (style == 0)
5707
    return;
5708
 
5709
  if (current_function_pops_args && current_function_args_size)
5710
    {
5711
      rtx popc = GEN_INT (current_function_pops_args);
5712
 
5713
      /* i386 can only pop 64K bytes.  If asked to pop more, pop
5714
         return address, do explicit add, and jump indirectly to the
5715
         caller.  */
5716
 
5717
      if (current_function_pops_args >= 65536)
5718
        {
5719
          rtx ecx = gen_rtx_REG (SImode, 2);
5720
 
5721
          /* There is no "pascal" calling convention in 64bit ABI.  */
5722
          gcc_assert (!TARGET_64BIT);
5723
 
5724
          emit_insn (gen_popsi1 (ecx));
5725
          emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5726
          emit_jump_insn (gen_return_indirect_internal (ecx));
5727
        }
5728
      else
5729
        emit_jump_insn (gen_return_pop_internal (popc));
5730
    }
5731
  else
5732
    emit_jump_insn (gen_return_internal ());
5733
}
5734
 
5735
/* Reset from the function's potential modifications.  */
5736
 
5737
static void
5738
ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5739
                               HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5740
{
5741
  if (pic_offset_table_rtx)
5742
    REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5743
#if TARGET_MACHO
5744
  /* Mach-O doesn't support labels at the end of objects, so if
5745
     it looks like we might want one, insert a NOP.  */
5746
  {
5747
    rtx insn = get_last_insn ();
5748
    while (insn
5749
           && NOTE_P (insn)
5750
           && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5751
      insn = PREV_INSN (insn);
5752
    if (insn
5753
        && (LABEL_P (insn)
5754
            || (NOTE_P (insn)
5755
                && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5756
      fputs ("\tnop\n", file);
5757
  }
5758
#endif
5759
 
5760
}
5761
 
5762
/* Extract the parts of an RTL expression that is a valid memory address
5763
   for an instruction.  Return 0 if the structure of the address is
5764
   grossly off.  Return -1 if the address contains ASHIFT, so it is not
5765
   strictly valid, but still used for computing length of lea instruction.  */
5766
 
5767
int
5768
ix86_decompose_address (rtx addr, struct ix86_address *out)
5769
{
5770
  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5771
  rtx base_reg, index_reg;
5772
  HOST_WIDE_INT scale = 1;
5773
  rtx scale_rtx = NULL_RTX;
5774
  int retval = 1;
5775
  enum ix86_address_seg seg = SEG_DEFAULT;
5776
 
5777
  if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5778
    base = addr;
5779
  else if (GET_CODE (addr) == PLUS)
5780
    {
5781
      rtx addends[4], op;
5782
      int n = 0, i;
5783
 
5784
      op = addr;
5785
      do
5786
        {
5787
          if (n >= 4)
5788
            return 0;
5789
          addends[n++] = XEXP (op, 1);
5790
          op = XEXP (op, 0);
5791
        }
5792
      while (GET_CODE (op) == PLUS);
5793
      if (n >= 4)
5794
        return 0;
5795
      addends[n] = op;
5796
 
5797
      for (i = n; i >= 0; --i)
5798
        {
5799
          op = addends[i];
5800
          switch (GET_CODE (op))
5801
            {
5802
            case MULT:
5803
              if (index)
5804
                return 0;
5805
              index = XEXP (op, 0);
5806
              scale_rtx = XEXP (op, 1);
5807
              break;
5808
 
5809
            case UNSPEC:
5810
              if (XINT (op, 1) == UNSPEC_TP
5811
                  && TARGET_TLS_DIRECT_SEG_REFS
5812
                  && seg == SEG_DEFAULT)
5813
                seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5814
              else
5815
                return 0;
5816
              break;
5817
 
5818
            case REG:
5819
            case SUBREG:
5820
              if (!base)
5821
                base = op;
5822
              else if (!index)
5823
                index = op;
5824
              else
5825
                return 0;
5826
              break;
5827
 
5828
            case CONST:
5829
            case CONST_INT:
5830
            case SYMBOL_REF:
5831
            case LABEL_REF:
5832
              if (disp)
5833
                return 0;
5834
              disp = op;
5835
              break;
5836
 
5837
            default:
5838
              return 0;
5839
            }
5840
        }
5841
    }
5842
  else if (GET_CODE (addr) == MULT)
5843
    {
5844
      index = XEXP (addr, 0);            /* index*scale */
5845
      scale_rtx = XEXP (addr, 1);
5846
    }
5847
  else if (GET_CODE (addr) == ASHIFT)
5848
    {
5849
      rtx tmp;
5850
 
5851
      /* We're called for lea too, which implements ashift on occasion.  */
5852
      index = XEXP (addr, 0);
5853
      tmp = XEXP (addr, 1);
5854
      if (GET_CODE (tmp) != CONST_INT)
5855
        return 0;
5856
      scale = INTVAL (tmp);
5857
      if ((unsigned HOST_WIDE_INT) scale > 3)
5858
        return 0;
5859
      scale = 1 << scale;
5860
      retval = -1;
5861
    }
5862
  else
5863
    disp = addr;                        /* displacement */
5864
 
5865
  /* Extract the integral value of scale.  */
5866
  if (scale_rtx)
5867
    {
5868
      if (GET_CODE (scale_rtx) != CONST_INT)
5869
        return 0;
5870
      scale = INTVAL (scale_rtx);
5871
    }
5872
 
5873
  base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5874
  index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5875
 
5876
  /* Allow arg pointer and stack pointer as index if there is not scaling.  */
5877
  if (base_reg && index_reg && scale == 1
5878
      && (index_reg == arg_pointer_rtx
5879
          || index_reg == frame_pointer_rtx
5880
          || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5881
    {
5882
      rtx tmp;
5883
      tmp = base, base = index, index = tmp;
5884
      tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5885
    }
5886
 
5887
  /* Special case: %ebp cannot be encoded as a base without a displacement.  */
5888
  if ((base_reg == hard_frame_pointer_rtx
5889
       || base_reg == frame_pointer_rtx
5890
       || base_reg == arg_pointer_rtx) && !disp)
5891
    disp = const0_rtx;
5892
 
5893
  /* Special case: on K6, [%esi] makes the instruction vector decoded.
5894
     Avoid this by transforming to [%esi+0].  */
5895
  if (ix86_tune == PROCESSOR_K6 && !optimize_size
5896
      && base_reg && !index_reg && !disp
5897
      && REG_P (base_reg)
5898
      && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5899
    disp = const0_rtx;
5900
 
5901
  /* Special case: encode reg+reg instead of reg*2.  */
5902
  if (!base && index && scale && scale == 2)
5903
    base = index, base_reg = index_reg, scale = 1;
5904
 
5905
  /* Special case: scaling cannot be encoded without base or displacement.  */
5906
  if (!base && !disp && index && scale != 1)
5907
    disp = const0_rtx;
5908
 
5909
  out->base = base;
5910
  out->index = index;
5911
  out->disp = disp;
5912
  out->scale = scale;
5913
  out->seg = seg;
5914
 
5915
  return retval;
5916
}
5917
 
5918
/* Return cost of the memory address x.
5919
   For i386, it is better to use a complex address than let gcc copy
5920
   the address into a reg and make a new pseudo.  But not if the address
5921
   requires to two regs - that would mean more pseudos with longer
5922
   lifetimes.  */
5923
static int
5924
ix86_address_cost (rtx x)
5925
{
5926
  struct ix86_address parts;
5927
  int cost = 1;
5928
  int ok = ix86_decompose_address (x, &parts);
5929
 
5930
  gcc_assert (ok);
5931
 
5932
  if (parts.base && GET_CODE (parts.base) == SUBREG)
5933
    parts.base = SUBREG_REG (parts.base);
5934
  if (parts.index && GET_CODE (parts.index) == SUBREG)
5935
    parts.index = SUBREG_REG (parts.index);
5936
 
5937
  /* More complex memory references are better.  */
5938
  if (parts.disp && parts.disp != const0_rtx)
5939
    cost--;
5940
  if (parts.seg != SEG_DEFAULT)
5941
    cost--;
5942
 
5943
  /* Attempt to minimize number of registers in the address.  */
5944
  if ((parts.base
5945
       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5946
      || (parts.index
5947
          && (!REG_P (parts.index)
5948
              || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5949
    cost++;
5950
 
5951
  if (parts.base
5952
      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5953
      && parts.index
5954
      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5955
      && parts.base != parts.index)
5956
    cost++;
5957
 
5958
  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5959
     since it's predecode logic can't detect the length of instructions
5960
     and it degenerates to vector decoded.  Increase cost of such
5961
     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
5962
     to split such addresses or even refuse such addresses at all.
5963
 
5964
     Following addressing modes are affected:
5965
      [base+scale*index]
5966
      [scale*index+disp]
5967
      [base+index]
5968
 
5969
     The first and last case  may be avoidable by explicitly coding the zero in
5970
     memory address, but I don't have AMD-K6 machine handy to check this
5971
     theory.  */
5972
 
5973
  if (TARGET_K6
5974
      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5975
          || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5976
          || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5977
    cost += 10;
5978
 
5979
  return cost;
5980
}
5981
 
5982
/* If X is a machine specific address (i.e. a symbol or label being
5983
   referenced as a displacement from the GOT implemented using an
5984
   UNSPEC), then return the base term.  Otherwise return X.  */
5985
 
5986
rtx
5987
ix86_find_base_term (rtx x)
5988
{
5989
  rtx term;
5990
 
5991
  if (TARGET_64BIT)
5992
    {
5993
      if (GET_CODE (x) != CONST)
5994
        return x;
5995
      term = XEXP (x, 0);
5996
      if (GET_CODE (term) == PLUS
5997
          && (GET_CODE (XEXP (term, 1)) == CONST_INT
5998
              || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5999
        term = XEXP (term, 0);
6000
      if (GET_CODE (term) != UNSPEC
6001
          || XINT (term, 1) != UNSPEC_GOTPCREL)
6002
        return x;
6003
 
6004
      term = XVECEXP (term, 0, 0);
6005
 
6006
      if (GET_CODE (term) != SYMBOL_REF
6007
          && GET_CODE (term) != LABEL_REF)
6008
        return x;
6009
 
6010
      return term;
6011
    }
6012
 
6013
  term = ix86_delegitimize_address (x);
6014
 
6015
  if (GET_CODE (term) != SYMBOL_REF
6016
      && GET_CODE (term) != LABEL_REF)
6017
    return x;
6018
 
6019
  return term;
6020
}
6021
 
6022
/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6023
   this is used for to form addresses to local data when -fPIC is in
6024
   use.  */
6025
 
6026
static bool
6027
darwin_local_data_pic (rtx disp)
6028
{
6029
  if (GET_CODE (disp) == MINUS)
6030
    {
6031
      if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6032
          || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6033
        if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6034
          {
6035
            const char *sym_name = XSTR (XEXP (disp, 1), 0);
6036
            if (! strcmp (sym_name, "<pic base>"))
6037
              return true;
6038
          }
6039
    }
6040
 
6041
  return false;
6042
}
6043
 
6044
/* Determine if a given RTX is a valid constant.  We already know this
6045
   satisfies CONSTANT_P.  */
6046
 
6047
bool
6048
legitimate_constant_p (rtx x)
6049
{
6050
  switch (GET_CODE (x))
6051
    {
6052
    case CONST:
6053
      x = XEXP (x, 0);
6054
 
6055
      if (GET_CODE (x) == PLUS)
6056
        {
6057
          if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6058
            return false;
6059
          x = XEXP (x, 0);
6060
        }
6061
 
6062
      if (TARGET_MACHO && darwin_local_data_pic (x))
6063
        return true;
6064
 
6065
      /* Only some unspecs are valid as "constants".  */
6066
      if (GET_CODE (x) == UNSPEC)
6067
        switch (XINT (x, 1))
6068
          {
6069
          case UNSPEC_GOTOFF:
6070
            return TARGET_64BIT;
6071
          case UNSPEC_TPOFF:
6072
          case UNSPEC_NTPOFF:
6073
            x = XVECEXP (x, 0, 0);
6074
            return (GET_CODE (x) == SYMBOL_REF
6075
                    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6076
          case UNSPEC_DTPOFF:
6077
            x = XVECEXP (x, 0, 0);
6078
            return (GET_CODE (x) == SYMBOL_REF
6079
                    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6080
          default:
6081
            return false;
6082
          }
6083
 
6084
      /* We must have drilled down to a symbol.  */
6085
      if (GET_CODE (x) == LABEL_REF)
6086
        return true;
6087
      if (GET_CODE (x) != SYMBOL_REF)
6088
        return false;
6089
      /* FALLTHRU */
6090
 
6091
    case SYMBOL_REF:
6092
      /* TLS symbols are never valid.  */
6093
      if (SYMBOL_REF_TLS_MODEL (x))
6094
        return false;
6095
      break;
6096
 
6097
    case CONST_DOUBLE:
6098
      if (GET_MODE (x) == TImode
6099
          && x != CONST0_RTX (TImode)
6100
          && !TARGET_64BIT)
6101
        return false;
6102
      break;
6103
 
6104
    case CONST_VECTOR:
6105
      if (x == CONST0_RTX (GET_MODE (x)))
6106
        return true;
6107
      return false;
6108
 
6109
    default:
6110
      break;
6111
    }
6112
 
6113
  /* Otherwise we handle everything else in the move patterns.  */
6114
  return true;
6115
}
6116
 
6117
/* Determine if it's legal to put X into the constant pool.  This
6118
   is not possible for the address of thread-local symbols, which
6119
   is checked above.  */
6120
 
6121
static bool
6122
ix86_cannot_force_const_mem (rtx x)
6123
{
6124
  /* We can always put integral constants and vectors in memory.  */
6125
  switch (GET_CODE (x))
6126
    {
6127
    case CONST_INT:
6128
    case CONST_DOUBLE:
6129
    case CONST_VECTOR:
6130
      return false;
6131
 
6132
    default:
6133
      break;
6134
    }
6135
  return !legitimate_constant_p (x);
6136
}
6137
 
6138
/* Determine if a given RTX is a valid constant address.  */
6139
 
6140
bool
6141
constant_address_p (rtx x)
6142
{
6143
  return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6144
}
6145
 
6146
/* Nonzero if the constant value X is a legitimate general operand
6147
   when generating PIC code.  It is given that flag_pic is on and
6148
   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
6149
 
6150
bool
6151
legitimate_pic_operand_p (rtx x)
6152
{
6153
  rtx inner;
6154
 
6155
  switch (GET_CODE (x))
6156
    {
6157
    case CONST:
6158
      inner = XEXP (x, 0);
6159
      if (GET_CODE (inner) == PLUS
6160
          && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6161
        inner = XEXP (inner, 0);
6162
 
6163
      /* Only some unspecs are valid as "constants".  */
6164
      if (GET_CODE (inner) == UNSPEC)
6165
        switch (XINT (inner, 1))
6166
          {
6167
          case UNSPEC_GOTOFF:
6168
            return TARGET_64BIT;
6169
          case UNSPEC_TPOFF:
6170
            x = XVECEXP (inner, 0, 0);
6171
            return (GET_CODE (x) == SYMBOL_REF
6172
                    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6173
          default:
6174
            return false;
6175
          }
6176
      /* FALLTHRU */
6177
 
6178
    case SYMBOL_REF:
6179
    case LABEL_REF:
6180
      return legitimate_pic_address_disp_p (x);
6181
 
6182
    default:
6183
      return true;
6184
    }
6185
}
6186
 
6187
/* Determine if a given CONST RTX is a valid memory displacement
6188
   in PIC mode.  */
6189
 
6190
int
6191
legitimate_pic_address_disp_p (rtx disp)
6192
{
6193
  bool saw_plus;
6194
 
6195
  /* In 64bit mode we can allow direct addresses of symbols and labels
6196
     when they are not dynamic symbols.  */
6197
  if (TARGET_64BIT)
6198
    {
6199
      rtx op0 = disp, op1;
6200
 
6201
      switch (GET_CODE (disp))
6202
        {
6203
        case LABEL_REF:
6204
          return true;
6205
 
6206
        case CONST:
6207
          if (GET_CODE (XEXP (disp, 0)) != PLUS)
6208
            break;
6209
          op0 = XEXP (XEXP (disp, 0), 0);
6210
          op1 = XEXP (XEXP (disp, 0), 1);
6211
          if (GET_CODE (op1) != CONST_INT
6212
              || INTVAL (op1) >= 16*1024*1024
6213
              || INTVAL (op1) < -16*1024*1024)
6214
            break;
6215
          if (GET_CODE (op0) == LABEL_REF)
6216
            return true;
6217
          if (GET_CODE (op0) != SYMBOL_REF)
6218
            break;
6219
          /* FALLTHRU */
6220
 
6221
        case SYMBOL_REF:
6222
          /* TLS references should always be enclosed in UNSPEC.  */
6223
          if (SYMBOL_REF_TLS_MODEL (op0))
6224
            return false;
6225
          if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6226
            return true;
6227
          break;
6228
 
6229
        default:
6230
          break;
6231
        }
6232
    }
6233
  if (GET_CODE (disp) != CONST)
6234
    return 0;
6235
  disp = XEXP (disp, 0);
6236
 
6237
  if (TARGET_64BIT)
6238
    {
6239
      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
6240
         of GOT tables.  We should not need these anyway.  */
6241
      if (GET_CODE (disp) != UNSPEC
6242
          || (XINT (disp, 1) != UNSPEC_GOTPCREL
6243
              && XINT (disp, 1) != UNSPEC_GOTOFF))
6244
        return 0;
6245
 
6246
      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6247
          && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6248
        return 0;
6249
      return 1;
6250
    }
6251
 
6252
  saw_plus = false;
6253
  if (GET_CODE (disp) == PLUS)
6254
    {
6255
      if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6256
        return 0;
6257
      disp = XEXP (disp, 0);
6258
      saw_plus = true;
6259
    }
6260
 
6261
  if (TARGET_MACHO && darwin_local_data_pic (disp))
6262
    return 1;
6263
 
6264
  if (GET_CODE (disp) != UNSPEC)
6265
    return 0;
6266
 
6267
  switch (XINT (disp, 1))
6268
    {
6269
    case UNSPEC_GOT:
6270
      if (saw_plus)
6271
        return false;
6272
      return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6273
    case UNSPEC_GOTOFF:
6274
      /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6275
         While ABI specify also 32bit relocation but we don't produce it in
6276
         small PIC model at all.  */
6277
      if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6278
           || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6279
          && !TARGET_64BIT)
6280
        return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6281
      return false;
6282
    case UNSPEC_GOTTPOFF:
6283
    case UNSPEC_GOTNTPOFF:
6284
    case UNSPEC_INDNTPOFF:
6285
      if (saw_plus)
6286
        return false;
6287
      disp = XVECEXP (disp, 0, 0);
6288
      return (GET_CODE (disp) == SYMBOL_REF
6289
              && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6290
    case UNSPEC_NTPOFF:
6291
      disp = XVECEXP (disp, 0, 0);
6292
      return (GET_CODE (disp) == SYMBOL_REF
6293
              && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6294
    case UNSPEC_DTPOFF:
6295
      disp = XVECEXP (disp, 0, 0);
6296
      return (GET_CODE (disp) == SYMBOL_REF
6297
              && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6298
    }
6299
 
6300
  return 0;
6301
}
6302
 
6303
/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6304
   memory address for an instruction.  The MODE argument is the machine mode
6305
   for the MEM expression that wants to use this address.
6306
 
6307
   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
6308
   convert common non-canonical forms to canonical form so that they will
6309
   be recognized.  */
6310
 
6311
int
6312
legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6313
{
6314
  struct ix86_address parts;
6315
  rtx base, index, disp;
6316
  HOST_WIDE_INT scale;
6317
  const char *reason = NULL;
6318
  rtx reason_rtx = NULL_RTX;
6319
 
6320
  if (TARGET_DEBUG_ADDR)
6321
    {
6322
      fprintf (stderr,
6323
               "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6324
               GET_MODE_NAME (mode), strict);
6325
      debug_rtx (addr);
6326
    }
6327
 
6328
  if (ix86_decompose_address (addr, &parts) <= 0)
6329
    {
6330
      reason = "decomposition failed";
6331
      goto report_error;
6332
    }
6333
 
6334
  base = parts.base;
6335
  index = parts.index;
6336
  disp = parts.disp;
6337
  scale = parts.scale;
6338
 
6339
  /* Validate base register.
6340
 
6341
     Don't allow SUBREG's that span more than a word here.  It can lead to spill
6342
     failures when the base is one word out of a two word structure, which is
6343
     represented internally as a DImode int.  */
6344
 
6345
  if (base)
6346
    {
6347
      rtx reg;
6348
      reason_rtx = base;
6349
 
6350
      if (REG_P (base))
6351
        reg = base;
6352
      else if (GET_CODE (base) == SUBREG
6353
               && REG_P (SUBREG_REG (base))
6354
               && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6355
                  <= UNITS_PER_WORD)
6356
        reg = SUBREG_REG (base);
6357
      else
6358
        {
6359
          reason = "base is not a register";
6360
          goto report_error;
6361
        }
6362
 
6363
      if (GET_MODE (base) != Pmode)
6364
        {
6365
          reason = "base is not in Pmode";
6366
          goto report_error;
6367
        }
6368
 
6369
      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6370
          || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6371
        {
6372
          reason = "base is not valid";
6373
          goto report_error;
6374
        }
6375
    }
6376
 
6377
  /* Validate index register.
6378
 
6379
     Don't allow SUBREG's that span more than a word here -- same as above.  */
6380
 
6381
  if (index)
6382
    {
6383
      rtx reg;
6384
      reason_rtx = index;
6385
 
6386
      if (REG_P (index))
6387
        reg = index;
6388
      else if (GET_CODE (index) == SUBREG
6389
               && REG_P (SUBREG_REG (index))
6390
               && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6391
                  <= UNITS_PER_WORD)
6392
        reg = SUBREG_REG (index);
6393
      else
6394
        {
6395
          reason = "index is not a register";
6396
          goto report_error;
6397
        }
6398
 
6399
      if (GET_MODE (index) != Pmode)
6400
        {
6401
          reason = "index is not in Pmode";
6402
          goto report_error;
6403
        }
6404
 
6405
      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6406
          || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6407
        {
6408
          reason = "index is not valid";
6409
          goto report_error;
6410
        }
6411
    }
6412
 
6413
  /* Validate scale factor.  */
6414
  if (scale != 1)
6415
    {
6416
      reason_rtx = GEN_INT (scale);
6417
      if (!index)
6418
        {
6419
          reason = "scale without index";
6420
          goto report_error;
6421
        }
6422
 
6423
      if (scale != 2 && scale != 4 && scale != 8)
6424
        {
6425
          reason = "scale is not a valid multiplier";
6426
          goto report_error;
6427
        }
6428
    }
6429
 
6430
  /* Validate displacement.  */
6431
  if (disp)
6432
    {
6433
      reason_rtx = disp;
6434
 
6435
      if (GET_CODE (disp) == CONST
6436
          && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6437
        switch (XINT (XEXP (disp, 0), 1))
6438
          {
6439
          /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6440
             used.  While ABI specify also 32bit relocations, we don't produce
6441
             them at all and use IP relative instead.  */
6442
          case UNSPEC_GOT:
6443
          case UNSPEC_GOTOFF:
6444
            gcc_assert (flag_pic);
6445
            if (!TARGET_64BIT)
6446
              goto is_legitimate_pic;
6447
            reason = "64bit address unspec";
6448
            goto report_error;
6449
 
6450
          case UNSPEC_GOTPCREL:
6451
            gcc_assert (flag_pic);
6452
            goto is_legitimate_pic;
6453
 
6454
          case UNSPEC_GOTTPOFF:
6455
          case UNSPEC_GOTNTPOFF:
6456
          case UNSPEC_INDNTPOFF:
6457
          case UNSPEC_NTPOFF:
6458
          case UNSPEC_DTPOFF:
6459
            break;
6460
 
6461
          default:
6462
            reason = "invalid address unspec";
6463
            goto report_error;
6464
          }
6465
 
6466
      else if (SYMBOLIC_CONST (disp)
6467
               && (flag_pic
6468
                   || (TARGET_MACHO
6469
#if TARGET_MACHO
6470
                       && MACHOPIC_INDIRECT
6471
                       && !machopic_operand_p (disp)
6472
#endif
6473
               )))
6474
        {
6475
 
6476
        is_legitimate_pic:
6477
          if (TARGET_64BIT && (index || base))
6478
            {
6479
              /* foo@dtpoff(%rX) is ok.  */
6480
              if (GET_CODE (disp) != CONST
6481
                  || GET_CODE (XEXP (disp, 0)) != PLUS
6482
                  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6483
                  || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6484
                  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6485
                      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6486
                {
6487
                  reason = "non-constant pic memory reference";
6488
                  goto report_error;
6489
                }
6490
            }
6491
          else if (! legitimate_pic_address_disp_p (disp))
6492
            {
6493
              reason = "displacement is an invalid pic construct";
6494
              goto report_error;
6495
            }
6496
 
6497
          /* This code used to verify that a symbolic pic displacement
6498
             includes the pic_offset_table_rtx register.
6499
 
6500
             While this is good idea, unfortunately these constructs may
6501
             be created by "adds using lea" optimization for incorrect
6502
             code like:
6503
 
6504
             int a;
6505
             int foo(int i)
6506
               {
6507
                 return *(&a+i);
6508
               }
6509
 
6510
             This code is nonsensical, but results in addressing
6511
             GOT table with pic_offset_table_rtx base.  We can't
6512
             just refuse it easily, since it gets matched by
6513
             "addsi3" pattern, that later gets split to lea in the
6514
             case output register differs from input.  While this
6515
             can be handled by separate addsi pattern for this case
6516
             that never results in lea, this seems to be easier and
6517
             correct fix for crash to disable this test.  */
6518
        }
6519
      else if (GET_CODE (disp) != LABEL_REF
6520
               && GET_CODE (disp) != CONST_INT
6521
               && (GET_CODE (disp) != CONST
6522
                   || !legitimate_constant_p (disp))
6523
               && (GET_CODE (disp) != SYMBOL_REF
6524
                   || !legitimate_constant_p (disp)))
6525
        {
6526
          reason = "displacement is not constant";
6527
          goto report_error;
6528
        }
6529
      else if (TARGET_64BIT
6530
               && !x86_64_immediate_operand (disp, VOIDmode))
6531
        {
6532
          reason = "displacement is out of range";
6533
          goto report_error;
6534
        }
6535
    }
6536
 
6537
  /* Everything looks valid.  */
6538
  if (TARGET_DEBUG_ADDR)
6539
    fprintf (stderr, "Success.\n");
6540
  return TRUE;
6541
 
6542
 report_error:
6543
  if (TARGET_DEBUG_ADDR)
6544
    {
6545
      fprintf (stderr, "Error: %s\n", reason);
6546
      debug_rtx (reason_rtx);
6547
    }
6548
  return FALSE;
6549
}
6550
 
6551
/* Return a unique alias set for the GOT.  */
6552
 
6553
static HOST_WIDE_INT
6554
ix86_GOT_alias_set (void)
6555
{
6556
  static HOST_WIDE_INT set = -1;
6557
  if (set == -1)
6558
    set = new_alias_set ();
6559
  return set;
6560
}
6561
 
6562
/* Return a legitimate reference for ORIG (an address) using the
6563
   register REG.  If REG is 0, a new pseudo is generated.
6564
 
6565
   There are two types of references that must be handled:
6566
 
6567
   1. Global data references must load the address from the GOT, via
6568
      the PIC reg.  An insn is emitted to do this load, and the reg is
6569
      returned.
6570
 
6571
   2. Static data references, constant pool addresses, and code labels
6572
      compute the address as an offset from the GOT, whose base is in
6573
      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
6574
      differentiate them from global data objects.  The returned
6575
      address is the PIC reg + an unspec constant.
6576
 
6577
   GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6578
   reg also appears in the address.  */
6579
 
6580
static rtx
6581
legitimize_pic_address (rtx orig, rtx reg)
6582
{
6583
  rtx addr = orig;
6584
  rtx new = orig;
6585
  rtx base;
6586
 
6587
#if TARGET_MACHO
6588
  if (TARGET_MACHO && !TARGET_64BIT)
6589
    {
6590
      if (reg == 0)
6591
        reg = gen_reg_rtx (Pmode);
6592
      /* Use the generic Mach-O PIC machinery.  */
6593
      return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6594
    }
6595
#endif
6596
 
6597
  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6598
    new = addr;
6599
  else if (TARGET_64BIT
6600
           && ix86_cmodel != CM_SMALL_PIC
6601
           && local_symbolic_operand (addr, Pmode))
6602
    {
6603
      rtx tmpreg;
6604
      /* This symbol may be referenced via a displacement from the PIC
6605
         base address (@GOTOFF).  */
6606
 
6607
      if (reload_in_progress)
6608
        regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6609
      if (GET_CODE (addr) == CONST)
6610
        addr = XEXP (addr, 0);
6611
      if (GET_CODE (addr) == PLUS)
6612
          {
6613
            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6614
            new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6615
          }
6616
        else
6617
          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6618
      new = gen_rtx_CONST (Pmode, new);
6619
      if (!reg)
6620
        tmpreg = gen_reg_rtx (Pmode);
6621
      else
6622
        tmpreg = reg;
6623
      emit_move_insn (tmpreg, new);
6624
 
6625
      if (reg != 0)
6626
        {
6627
          new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6628
                                     tmpreg, 1, OPTAB_DIRECT);
6629
          new = reg;
6630
        }
6631
      else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6632
    }
6633
  else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6634
    {
6635
      /* This symbol may be referenced via a displacement from the PIC
6636
         base address (@GOTOFF).  */
6637
 
6638
      if (reload_in_progress)
6639
        regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6640
      if (GET_CODE (addr) == CONST)
6641
        addr = XEXP (addr, 0);
6642
      if (GET_CODE (addr) == PLUS)
6643
          {
6644
            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6645
            new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6646
          }
6647
        else
6648
          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6649
      new = gen_rtx_CONST (Pmode, new);
6650
      new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6651
 
6652
      if (reg != 0)
6653
        {
6654
          emit_move_insn (reg, new);
6655
          new = reg;
6656
        }
6657
    }
6658
  else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6659
    {
6660
      if (TARGET_64BIT)
6661
        {
6662
          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6663
          new = gen_rtx_CONST (Pmode, new);
6664
          new = gen_const_mem (Pmode, new);
6665
          set_mem_alias_set (new, ix86_GOT_alias_set ());
6666
 
6667
          if (reg == 0)
6668
            reg = gen_reg_rtx (Pmode);
6669
          /* Use directly gen_movsi, otherwise the address is loaded
6670
             into register for CSE.  We don't want to CSE this addresses,
6671
             instead we CSE addresses from the GOT table, so skip this.  */
6672
          emit_insn (gen_movsi (reg, new));
6673
          new = reg;
6674
        }
6675
      else
6676
        {
6677
          /* This symbol must be referenced via a load from the
6678
             Global Offset Table (@GOT).  */
6679
 
6680
          if (reload_in_progress)
6681
            regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6682
          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6683
          new = gen_rtx_CONST (Pmode, new);
6684
          new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6685
          new = gen_const_mem (Pmode, new);
6686
          set_mem_alias_set (new, ix86_GOT_alias_set ());
6687
 
6688
          if (reg == 0)
6689
            reg = gen_reg_rtx (Pmode);
6690
          emit_move_insn (reg, new);
6691
          new = reg;
6692
        }
6693
    }
6694
  else
6695
    {
6696
      if (GET_CODE (addr) == CONST_INT
6697
          && !x86_64_immediate_operand (addr, VOIDmode))
6698
        {
6699
          if (reg)
6700
            {
6701
              emit_move_insn (reg, addr);
6702
              new = reg;
6703
            }
6704
          else
6705
            new = force_reg (Pmode, addr);
6706
        }
6707
      else if (GET_CODE (addr) == CONST)
6708
        {
6709
          addr = XEXP (addr, 0);
6710
 
6711
          /* We must match stuff we generate before.  Assume the only
6712
             unspecs that can get here are ours.  Not that we could do
6713
             anything with them anyway....  */
6714
          if (GET_CODE (addr) == UNSPEC
6715
              || (GET_CODE (addr) == PLUS
6716
                  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6717
            return orig;
6718
          gcc_assert (GET_CODE (addr) == PLUS);
6719
        }
6720
      if (GET_CODE (addr) == PLUS)
6721
        {
6722
          rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6723
 
6724
          /* Check first to see if this is a constant offset from a @GOTOFF
6725
             symbol reference.  */
6726
          if (local_symbolic_operand (op0, Pmode)
6727
              && GET_CODE (op1) == CONST_INT)
6728
            {
6729
              if (!TARGET_64BIT)
6730
                {
6731
                  if (reload_in_progress)
6732
                    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6733
                  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6734
                                        UNSPEC_GOTOFF);
6735
                  new = gen_rtx_PLUS (Pmode, new, op1);
6736
                  new = gen_rtx_CONST (Pmode, new);
6737
                  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6738
 
6739
                  if (reg != 0)
6740
                    {
6741
                      emit_move_insn (reg, new);
6742
                      new = reg;
6743
                    }
6744
                }
6745
              else
6746
                {
6747
                  if (INTVAL (op1) < -16*1024*1024
6748
                      || INTVAL (op1) >= 16*1024*1024)
6749
                    {
6750
                      if (!x86_64_immediate_operand (op1, Pmode))
6751
                        op1 = force_reg (Pmode, op1);
6752
                      new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6753
                    }
6754
                }
6755
            }
6756
          else
6757
            {
6758
              base = legitimize_pic_address (XEXP (addr, 0), reg);
6759
              new  = legitimize_pic_address (XEXP (addr, 1),
6760
                                             base == reg ? NULL_RTX : reg);
6761
 
6762
              if (GET_CODE (new) == CONST_INT)
6763
                new = plus_constant (base, INTVAL (new));
6764
              else
6765
                {
6766
                  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6767
                    {
6768
                      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6769
                      new = XEXP (new, 1);
6770
                    }
6771
                  new = gen_rtx_PLUS (Pmode, base, new);
6772
                }
6773
            }
6774
        }
6775
    }
6776
  return new;
6777
}
6778
 
6779
/* Load the thread pointer.  If TO_REG is true, force it into a register.  */
6780
 
6781
static rtx
6782
get_thread_pointer (int to_reg)
6783
{
6784
  rtx tp, reg, insn;
6785
 
6786
  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6787
  if (!to_reg)
6788
    return tp;
6789
 
6790
  reg = gen_reg_rtx (Pmode);
6791
  insn = gen_rtx_SET (VOIDmode, reg, tp);
6792
  insn = emit_insn (insn);
6793
 
6794
  return reg;
6795
}
6796
 
6797
/* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
6798
   false if we expect this to be used for a memory address and true if
6799
   we expect to load the address into a register.  */
6800
 
6801
static rtx
6802
legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6803
{
6804
  rtx dest, base, off, pic, tp;
6805
  int type;
6806
 
6807
  switch (model)
6808
    {
6809
    case TLS_MODEL_GLOBAL_DYNAMIC:
6810
      dest = gen_reg_rtx (Pmode);
6811
      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6812
 
6813
      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6814
        {
6815
          rtx rax = gen_rtx_REG (Pmode, 0), insns;
6816
 
6817
          start_sequence ();
6818
          emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6819
          insns = get_insns ();
6820
          end_sequence ();
6821
 
6822
          emit_libcall_block (insns, dest, rax, x);
6823
        }
6824
      else if (TARGET_64BIT && TARGET_GNU2_TLS)
6825
        emit_insn (gen_tls_global_dynamic_64 (dest, x));
6826
      else
6827
        emit_insn (gen_tls_global_dynamic_32 (dest, x));
6828
 
6829
      if (TARGET_GNU2_TLS)
6830
        {
6831
          dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6832
 
6833
          set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6834
        }
6835
      break;
6836
 
6837
    case TLS_MODEL_LOCAL_DYNAMIC:
6838
      base = gen_reg_rtx (Pmode);
6839
      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6840
 
6841
      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6842
        {
6843
          rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6844
 
6845
          start_sequence ();
6846
          emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6847
          insns = get_insns ();
6848
          end_sequence ();
6849
 
6850
          note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6851
          note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6852
          emit_libcall_block (insns, base, rax, note);
6853
        }
6854
      else if (TARGET_64BIT && TARGET_GNU2_TLS)
6855
        emit_insn (gen_tls_local_dynamic_base_64 (base));
6856
      else
6857
        emit_insn (gen_tls_local_dynamic_base_32 (base));
6858
 
6859
      if (TARGET_GNU2_TLS)
6860
        {
6861
          rtx x = ix86_tls_module_base ();
6862
 
6863
          set_unique_reg_note (get_last_insn (), REG_EQUIV,
6864
                               gen_rtx_MINUS (Pmode, x, tp));
6865
        }
6866
 
6867
      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6868
      off = gen_rtx_CONST (Pmode, off);
6869
 
6870
      dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6871
 
6872
      if (TARGET_GNU2_TLS)
6873
        {
6874
          dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6875
 
6876
          set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6877
        }
6878
 
6879
      break;
6880
 
6881
    case TLS_MODEL_INITIAL_EXEC:
6882
      if (TARGET_64BIT)
6883
        {
6884
          pic = NULL;
6885
          type = UNSPEC_GOTNTPOFF;
6886
        }
6887
      else if (flag_pic)
6888
        {
6889
          if (reload_in_progress)
6890
            regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6891
          pic = pic_offset_table_rtx;
6892
          type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6893
        }
6894
      else if (!TARGET_ANY_GNU_TLS)
6895
        {
6896
          pic = gen_reg_rtx (Pmode);
6897
          emit_insn (gen_set_got (pic));
6898
          type = UNSPEC_GOTTPOFF;
6899
        }
6900
      else
6901
        {
6902
          pic = NULL;
6903
          type = UNSPEC_INDNTPOFF;
6904
        }
6905
 
6906
      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6907
      off = gen_rtx_CONST (Pmode, off);
6908
      if (pic)
6909
        off = gen_rtx_PLUS (Pmode, pic, off);
6910
      off = gen_const_mem (Pmode, off);
6911
      set_mem_alias_set (off, ix86_GOT_alias_set ());
6912
 
6913
      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6914
        {
6915
          base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6916
          off = force_reg (Pmode, off);
6917
          return gen_rtx_PLUS (Pmode, base, off);
6918
        }
6919
      else
6920
        {
6921
          base = get_thread_pointer (true);
6922
          dest = gen_reg_rtx (Pmode);
6923
          emit_insn (gen_subsi3 (dest, base, off));
6924
        }
6925
      break;
6926
 
6927
    case TLS_MODEL_LOCAL_EXEC:
6928
      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6929
                            (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6930
                            ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6931
      off = gen_rtx_CONST (Pmode, off);
6932
 
6933
      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6934
        {
6935
          base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6936
          return gen_rtx_PLUS (Pmode, base, off);
6937
        }
6938
      else
6939
        {
6940
          base = get_thread_pointer (true);
6941
          dest = gen_reg_rtx (Pmode);
6942
          emit_insn (gen_subsi3 (dest, base, off));
6943
        }
6944
      break;
6945
 
6946
    default:
6947
      gcc_unreachable ();
6948
    }
6949
 
6950
  return dest;
6951
}
6952
 
6953
/* Try machine-dependent ways of modifying an illegitimate address
6954
   to be legitimate.  If we find one, return the new, valid address.
6955
   This macro is used in only one place: `memory_address' in explow.c.
6956
 
6957
   OLDX is the address as it was before break_out_memory_refs was called.
6958
   In some cases it is useful to look at this to decide what needs to be done.
6959
 
6960
   MODE and WIN are passed so that this macro can use
6961
   GO_IF_LEGITIMATE_ADDRESS.
6962
 
6963
   It is always safe for this macro to do nothing.  It exists to recognize
6964
   opportunities to optimize the output.
6965
 
6966
   For the 80386, we handle X+REG by loading X into a register R and
6967
   using R+REG.  R will go in a general reg and indexing will be used.
6968
   However, if REG is a broken-out memory address or multiplication,
6969
   nothing needs to be done because REG can certainly go in a general reg.
6970
 
6971
   When -fpic is used, special handling is needed for symbolic references.
6972
   See comments by legitimize_pic_address in i386.c for details.  */
6973
 
6974
rtx
6975
legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6976
{
6977
  int changed = 0;
6978
  unsigned log;
6979
 
6980
  if (TARGET_DEBUG_ADDR)
6981
    {
6982
      fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6983
               GET_MODE_NAME (mode));
6984
      debug_rtx (x);
6985
    }
6986
 
6987
  log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6988
  if (log)
6989
    return legitimize_tls_address (x, log, false);
6990
  if (GET_CODE (x) == CONST
6991
      && GET_CODE (XEXP (x, 0)) == PLUS
6992
      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6993
      && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6994
    {
6995
      rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6996
      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6997
    }
6998
 
6999
  if (flag_pic && SYMBOLIC_CONST (x))
7000
    return legitimize_pic_address (x, 0);
7001
 
7002
  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7003
  if (GET_CODE (x) == ASHIFT
7004
      && GET_CODE (XEXP (x, 1)) == CONST_INT
7005
      && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7006
    {
7007
      changed = 1;
7008
      log = INTVAL (XEXP (x, 1));
7009
      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7010
                        GEN_INT (1 << log));
7011
    }
7012
 
7013
  if (GET_CODE (x) == PLUS)
7014
    {
7015
      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
7016
 
7017
      if (GET_CODE (XEXP (x, 0)) == ASHIFT
7018
          && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7019
          && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7020
        {
7021
          changed = 1;
7022
          log = INTVAL (XEXP (XEXP (x, 0), 1));
7023
          XEXP (x, 0) = gen_rtx_MULT (Pmode,
7024
                                      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7025
                                      GEN_INT (1 << log));
7026
        }
7027
 
7028
      if (GET_CODE (XEXP (x, 1)) == ASHIFT
7029
          && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7030
          && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7031
        {
7032
          changed = 1;
7033
          log = INTVAL (XEXP (XEXP (x, 1), 1));
7034
          XEXP (x, 1) = gen_rtx_MULT (Pmode,
7035
                                      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7036
                                      GEN_INT (1 << log));
7037
        }
7038
 
7039
      /* Put multiply first if it isn't already.  */
7040
      if (GET_CODE (XEXP (x, 1)) == MULT)
7041
        {
7042
          rtx tmp = XEXP (x, 0);
7043
          XEXP (x, 0) = XEXP (x, 1);
7044
          XEXP (x, 1) = tmp;
7045
          changed = 1;
7046
        }
7047
 
7048
      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7049
         into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
7050
         created by virtual register instantiation, register elimination, and
7051
         similar optimizations.  */
7052
      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7053
        {
7054
          changed = 1;
7055
          x = gen_rtx_PLUS (Pmode,
7056
                            gen_rtx_PLUS (Pmode, XEXP (x, 0),
7057
                                          XEXP (XEXP (x, 1), 0)),
7058
                            XEXP (XEXP (x, 1), 1));
7059
        }
7060
 
7061
      /* Canonicalize
7062
         (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7063
         into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
7064
      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7065
               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7066
               && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7067
               && CONSTANT_P (XEXP (x, 1)))
7068
        {
7069
          rtx constant;
7070
          rtx other = NULL_RTX;
7071
 
7072
          if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7073
            {
7074
              constant = XEXP (x, 1);
7075
              other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7076
            }
7077
          else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7078
            {
7079
              constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7080
              other = XEXP (x, 1);
7081
            }
7082
          else
7083
            constant = 0;
7084
 
7085
          if (constant)
7086
            {
7087
              changed = 1;
7088
              x = gen_rtx_PLUS (Pmode,
7089
                                gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7090
                                              XEXP (XEXP (XEXP (x, 0), 1), 0)),
7091
                                plus_constant (other, INTVAL (constant)));
7092
            }
7093
        }
7094
 
7095
      if (changed && legitimate_address_p (mode, x, FALSE))
7096
        return x;
7097
 
7098
      if (GET_CODE (XEXP (x, 0)) == MULT)
7099
        {
7100
          changed = 1;
7101
          XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7102
        }
7103
 
7104
      if (GET_CODE (XEXP (x, 1)) == MULT)
7105
        {
7106
          changed = 1;
7107
          XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7108
        }
7109
 
7110
      if (changed
7111
          && GET_CODE (XEXP (x, 1)) == REG
7112
          && GET_CODE (XEXP (x, 0)) == REG)
7113
        return x;
7114
 
7115
      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7116
        {
7117
          changed = 1;
7118
          x = legitimize_pic_address (x, 0);
7119
        }
7120
 
7121
      if (changed && legitimate_address_p (mode, x, FALSE))
7122
        return x;
7123
 
7124
      if (GET_CODE (XEXP (x, 0)) == REG)
7125
        {
7126
          rtx temp = gen_reg_rtx (Pmode);
7127
          rtx val  = force_operand (XEXP (x, 1), temp);
7128
          if (val != temp)
7129
            emit_move_insn (temp, val);
7130
 
7131
          XEXP (x, 1) = temp;
7132
          return x;
7133
        }
7134
 
7135
      else if (GET_CODE (XEXP (x, 1)) == REG)
7136
        {
7137
          rtx temp = gen_reg_rtx (Pmode);
7138
          rtx val  = force_operand (XEXP (x, 0), temp);
7139
          if (val != temp)
7140
            emit_move_insn (temp, val);
7141
 
7142
          XEXP (x, 0) = temp;
7143
          return x;
7144
        }
7145
    }
7146
 
7147
  return x;
7148
}
7149
 
7150
/* Print an integer constant expression in assembler syntax.  Addition
7151
   and subtraction are the only arithmetic that may appear in these
7152
   expressions.  FILE is the stdio stream to write to, X is the rtx, and
7153
   CODE is the operand print code from the output string.  */
7154
 
7155
static void
7156
output_pic_addr_const (FILE *file, rtx x, int code)
7157
{
7158
  char buf[256];
7159
 
7160
  switch (GET_CODE (x))
7161
    {
7162
    case PC:
7163
      gcc_assert (flag_pic);
7164
      putc ('.', file);
7165
      break;
7166
 
7167
    case SYMBOL_REF:
7168
      if (! TARGET_MACHO || TARGET_64BIT)
7169
        output_addr_const (file, x);
7170
      else
7171
        {
7172
          const char *name = XSTR (x, 0);
7173
 
7174
          /* Mark the decl as referenced so that cgraph will output the function.  */
7175
          if (SYMBOL_REF_DECL (x))
7176
            mark_decl_referenced (SYMBOL_REF_DECL (x));
7177
 
7178
#if TARGET_MACHO
7179
          if (MACHOPIC_INDIRECT
7180
              && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7181
            name = machopic_indirection_name (x, /*stub_p=*/true);
7182
#endif
7183
          assemble_name (file, name);
7184
        }
7185
      if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7186
        fputs ("@PLT", file);
7187
      break;
7188
 
7189
    case LABEL_REF:
7190
      x = XEXP (x, 0);
7191
      /* FALLTHRU */
7192
    case CODE_LABEL:
7193
      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7194
      assemble_name (asm_out_file, buf);
7195
      break;
7196
 
7197
    case CONST_INT:
7198
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7199
      break;
7200
 
7201
    case CONST:
7202
      /* This used to output parentheses around the expression,
7203
         but that does not work on the 386 (either ATT or BSD assembler).  */
7204
      output_pic_addr_const (file, XEXP (x, 0), code);
7205
      break;
7206
 
7207
    case CONST_DOUBLE:
7208
      if (GET_MODE (x) == VOIDmode)
7209
        {
7210
          /* We can use %d if the number is <32 bits and positive.  */
7211
          if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7212
            fprintf (file, "0x%lx%08lx",
7213
                     (unsigned long) CONST_DOUBLE_HIGH (x),
7214
                     (unsigned long) CONST_DOUBLE_LOW (x));
7215
          else
7216
            fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7217
        }
7218
      else
7219
        /* We can't handle floating point constants;
7220
           PRINT_OPERAND must handle them.  */
7221
        output_operand_lossage ("floating constant misused");
7222
      break;
7223
 
7224
    case PLUS:
7225
      /* Some assemblers need integer constants to appear first.  */
7226
      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7227
        {
7228
          output_pic_addr_const (file, XEXP (x, 0), code);
7229
          putc ('+', file);
7230
          output_pic_addr_const (file, XEXP (x, 1), code);
7231
        }
7232
      else
7233
        {
7234
          gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7235
          output_pic_addr_const (file, XEXP (x, 1), code);
7236
          putc ('+', file);
7237
          output_pic_addr_const (file, XEXP (x, 0), code);
7238
        }
7239
      break;
7240
 
7241
    case MINUS:
7242
      if (!TARGET_MACHO)
7243
        putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7244
      output_pic_addr_const (file, XEXP (x, 0), code);
7245
      putc ('-', file);
7246
      output_pic_addr_const (file, XEXP (x, 1), code);
7247
      if (!TARGET_MACHO)
7248
        putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7249
      break;
7250
 
7251
     case UNSPEC:
7252
       gcc_assert (XVECLEN (x, 0) == 1);
7253
       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7254
       switch (XINT (x, 1))
7255
        {
7256
        case UNSPEC_GOT:
7257
          fputs ("@GOT", file);
7258
          break;
7259
        case UNSPEC_GOTOFF:
7260
          fputs ("@GOTOFF", file);
7261
          break;
7262
        case UNSPEC_GOTPCREL:
7263
          fputs ("@GOTPCREL(%rip)", file);
7264
          break;
7265
        case UNSPEC_GOTTPOFF:
7266
          /* FIXME: This might be @TPOFF in Sun ld too.  */
7267
          fputs ("@GOTTPOFF", file);
7268
          break;
7269
        case UNSPEC_TPOFF:
7270
          fputs ("@TPOFF", file);
7271
          break;
7272
        case UNSPEC_NTPOFF:
7273
          if (TARGET_64BIT)
7274
            fputs ("@TPOFF", file);
7275
          else
7276
            fputs ("@NTPOFF", file);
7277
          break;
7278
        case UNSPEC_DTPOFF:
7279
          fputs ("@DTPOFF", file);
7280
          break;
7281
        case UNSPEC_GOTNTPOFF:
7282
          if (TARGET_64BIT)
7283
            fputs ("@GOTTPOFF(%rip)", file);
7284
          else
7285
            fputs ("@GOTNTPOFF", file);
7286
          break;
7287
        case UNSPEC_INDNTPOFF:
7288
          fputs ("@INDNTPOFF", file);
7289
          break;
7290
        default:
7291
          output_operand_lossage ("invalid UNSPEC as operand");
7292
          break;
7293
        }
7294
       break;
7295
 
7296
    default:
7297
      output_operand_lossage ("invalid expression as operand");
7298
    }
7299
}
7300
 
7301
/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7302
   We need to emit DTP-relative relocations.  */
7303
 
7304
static void
7305
i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7306
{
7307
  fputs (ASM_LONG, file);
7308
  output_addr_const (file, x);
7309
  fputs ("@DTPOFF", file);
7310
  switch (size)
7311
    {
7312
    case 4:
7313
      break;
7314
    case 8:
7315
      fputs (", 0", file);
7316
      break;
7317
    default:
7318
      gcc_unreachable ();
7319
   }
7320
}
7321
 
7322
/* In the name of slightly smaller debug output, and to cater to
7323
   general assembler lossage, recognize PIC+GOTOFF and turn it back
7324
   into a direct symbol reference.
7325
 
7326
   On Darwin, this is necessary to avoid a crash, because Darwin
7327
   has a different PIC label for each routine but the DWARF debugging
7328
   information is not associated with any particular routine, so it's
7329
   necessary to remove references to the PIC label from RTL stored by
7330
   the DWARF output code.  */
7331
 
7332
static rtx
7333
ix86_delegitimize_address (rtx orig_x)
7334
{
7335
  rtx x = orig_x;
7336
  /* reg_addend is NULL or a multiple of some register.  */
7337
  rtx reg_addend = NULL_RTX;
7338
  /* const_addend is NULL or a const_int.  */
7339
  rtx const_addend = NULL_RTX;
7340
  /* This is the result, or NULL.  */
7341
  rtx result = NULL_RTX;
7342
 
7343
  if (GET_CODE (x) == MEM)
7344
    x = XEXP (x, 0);
7345
 
7346
  if (TARGET_64BIT)
7347
    {
7348
      if (GET_CODE (x) != CONST
7349
          || GET_CODE (XEXP (x, 0)) != UNSPEC
7350
          || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7351
          || GET_CODE (orig_x) != MEM)
7352
        return orig_x;
7353
      return XVECEXP (XEXP (x, 0), 0, 0);
7354
    }
7355
 
7356
  if (GET_CODE (x) != PLUS
7357
      || GET_CODE (XEXP (x, 1)) != CONST)
7358
    return orig_x;
7359
 
7360
  if (GET_CODE (XEXP (x, 0)) == REG
7361
      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7362
    /* %ebx + GOT/GOTOFF */
7363
    ;
7364
  else if (GET_CODE (XEXP (x, 0)) == PLUS)
7365
    {
7366
      /* %ebx + %reg * scale + GOT/GOTOFF */
7367
      reg_addend = XEXP (x, 0);
7368
      if (GET_CODE (XEXP (reg_addend, 0)) == REG
7369
          && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7370
        reg_addend = XEXP (reg_addend, 1);
7371
      else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7372
               && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7373
        reg_addend = XEXP (reg_addend, 0);
7374
      else
7375
        return orig_x;
7376
      if (GET_CODE (reg_addend) != REG
7377
          && GET_CODE (reg_addend) != MULT
7378
          && GET_CODE (reg_addend) != ASHIFT)
7379
        return orig_x;
7380
    }
7381
  else
7382
    return orig_x;
7383
 
7384
  x = XEXP (XEXP (x, 1), 0);
7385
  if (GET_CODE (x) == PLUS
7386
      && GET_CODE (XEXP (x, 1)) == CONST_INT)
7387
    {
7388
      const_addend = XEXP (x, 1);
7389
      x = XEXP (x, 0);
7390
    }
7391
 
7392
  if (GET_CODE (x) == UNSPEC
7393
      && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7394
          || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7395
    result = XVECEXP (x, 0, 0);
7396
 
7397
  if (TARGET_MACHO && darwin_local_data_pic (x)
7398
      && GET_CODE (orig_x) != MEM)
7399
    result = XEXP (x, 0);
7400
 
7401
  if (! result)
7402
    return orig_x;
7403
 
7404
  if (const_addend)
7405
    result = gen_rtx_PLUS (Pmode, result, const_addend);
7406
  if (reg_addend)
7407
    result = gen_rtx_PLUS (Pmode, reg_addend, result);
7408
  return result;
7409
}
7410
 
7411
static void
7412
put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7413
                    int fp, FILE *file)
7414
{
7415
  const char *suffix;
7416
 
7417
  if (mode == CCFPmode || mode == CCFPUmode)
7418
    {
7419
      enum rtx_code second_code, bypass_code;
7420
      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7421
      gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7422
      code = ix86_fp_compare_code_to_integer (code);
7423
      mode = CCmode;
7424
    }
7425
  if (reverse)
7426
    code = reverse_condition (code);
7427
 
7428
  switch (code)
7429
    {
7430
    case EQ:
7431
      suffix = "e";
7432
      break;
7433
    case NE:
7434
      suffix = "ne";
7435
      break;
7436
    case GT:
7437
      gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7438
      suffix = "g";
7439
      break;
7440
    case GTU:
7441
      /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7442
         Those same assemblers have the same but opposite lossage on cmov.  */
7443
      gcc_assert (mode == CCmode);
7444
      suffix = fp ? "nbe" : "a";
7445
      break;
7446
    case LT:
7447
      switch (mode)
7448
        {
7449
        case CCNOmode:
7450
        case CCGOCmode:
7451
          suffix = "s";
7452
          break;
7453
 
7454
        case CCmode:
7455
        case CCGCmode:
7456
          suffix = "l";
7457
          break;
7458
 
7459
        default:
7460
          gcc_unreachable ();
7461
        }
7462
      break;
7463
    case LTU:
7464
      gcc_assert (mode == CCmode);
7465
      suffix = "b";
7466
      break;
7467
    case GE:
7468
      switch (mode)
7469
        {
7470
        case CCNOmode:
7471
        case CCGOCmode:
7472
          suffix = "ns";
7473
          break;
7474
 
7475
        case CCmode:
7476
        case CCGCmode:
7477
          suffix = "ge";
7478
          break;
7479
 
7480
        default:
7481
          gcc_unreachable ();
7482
        }
7483
      break;
7484
    case GEU:
7485
      /* ??? As above.  */
7486
      gcc_assert (mode == CCmode);
7487
      suffix = fp ? "nb" : "ae";
7488
      break;
7489
    case LE:
7490
      gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7491
      suffix = "le";
7492
      break;
7493
    case LEU:
7494
      gcc_assert (mode == CCmode);
7495
      suffix = "be";
7496
      break;
7497
    case UNORDERED:
7498
      suffix = fp ? "u" : "p";
7499
      break;
7500
    case ORDERED:
7501
      suffix = fp ? "nu" : "np";
7502
      break;
7503
    default:
7504
      gcc_unreachable ();
7505
    }
7506
  fputs (suffix, file);
7507
}
7508
 
7509
/* Print the name of register X to FILE based on its machine mode and number.
7510
   If CODE is 'w', pretend the mode is HImode.
7511
   If CODE is 'b', pretend the mode is QImode.
7512
   If CODE is 'k', pretend the mode is SImode.
7513
   If CODE is 'q', pretend the mode is DImode.
7514
   If CODE is 'h', pretend the reg is the 'high' byte register.
7515
   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
7516
 
7517
void
7518
print_reg (rtx x, int code, FILE *file)
7519
{
7520
  gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7521
              && REGNO (x) != FRAME_POINTER_REGNUM
7522
              && REGNO (x) != FLAGS_REG
7523
              && REGNO (x) != FPSR_REG);
7524
 
7525
  if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7526
    putc ('%', file);
7527
 
7528
  if (code == 'w' || MMX_REG_P (x))
7529
    code = 2;
7530
  else if (code == 'b')
7531
    code = 1;
7532
  else if (code == 'k')
7533
    code = 4;
7534
  else if (code == 'q')
7535
    code = 8;
7536
  else if (code == 'y')
7537
    code = 3;
7538
  else if (code == 'h')
7539
    code = 0;
7540
  else
7541
    code = GET_MODE_SIZE (GET_MODE (x));
7542
 
7543
  /* Irritatingly, AMD extended registers use different naming convention
7544
     from the normal registers.  */
7545
  if (REX_INT_REG_P (x))
7546
    {
7547
      gcc_assert (TARGET_64BIT);
7548
      switch (code)
7549
        {
7550
          case 0:
7551
            error ("extended registers have no high halves");
7552
            break;
7553
          case 1:
7554
            fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7555
            break;
7556
          case 2:
7557
            fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7558
            break;
7559
          case 4:
7560
            fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7561
            break;
7562
          case 8:
7563
            fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7564
            break;
7565
          default:
7566
            error ("unsupported operand size for extended register");
7567
            break;
7568
        }
7569
      return;
7570
    }
7571
  switch (code)
7572
    {
7573
    case 3:
7574
      if (STACK_TOP_P (x))
7575
        {
7576
          fputs ("st(0)", file);
7577
          break;
7578
        }
7579
      /* FALLTHRU */
7580
    case 8:
7581
    case 4:
7582
    case 12:
7583
      if (! ANY_FP_REG_P (x))
7584
        putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7585
      /* FALLTHRU */
7586
    case 16:
7587
    case 2:
7588
    normal:
7589
      fputs (hi_reg_name[REGNO (x)], file);
7590
      break;
7591
    case 1:
7592
      if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7593
        goto normal;
7594
      fputs (qi_reg_name[REGNO (x)], file);
7595
      break;
7596
    case 0:
7597
      if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7598
        goto normal;
7599
      fputs (qi_high_reg_name[REGNO (x)], file);
7600
      break;
7601
    default:
7602
      gcc_unreachable ();
7603
    }
7604
}
7605
 
7606
/* Locate some local-dynamic symbol still in use by this function
7607
   so that we can print its name in some tls_local_dynamic_base
7608
   pattern.  */
7609
 
7610
static const char *
7611
get_some_local_dynamic_name (void)
7612
{
7613
  rtx insn;
7614
 
7615
  if (cfun->machine->some_ld_name)
7616
    return cfun->machine->some_ld_name;
7617
 
7618
  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7619
    if (INSN_P (insn)
7620
        && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7621
      return cfun->machine->some_ld_name;
7622
 
7623
  gcc_unreachable ();
7624
}
7625
 
7626
static int
7627
get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7628
{
7629
  rtx x = *px;
7630
 
7631
  if (GET_CODE (x) == SYMBOL_REF
7632
      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7633
    {
7634
      cfun->machine->some_ld_name = XSTR (x, 0);
7635
      return 1;
7636
    }
7637
 
7638
  return 0;
7639
}
7640
 
7641
/* Meaning of CODE:
7642
   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7643
   C -- print opcode suffix for set/cmov insn.
7644
   c -- like C, but print reversed condition
7645
   F,f -- likewise, but for floating-point.
7646
   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7647
        otherwise nothing
7648
   R -- print the prefix for register names.
7649
   z -- print the opcode suffix for the size of the current operand.
7650
   * -- print a star (in certain assembler syntax)
7651
   A -- print an absolute memory reference.
7652
   w -- print the operand as if it's a "word" (HImode) even if it isn't.
7653
   s -- print a shift double count, followed by the assemblers argument
7654
        delimiter.
7655
   b -- print the QImode name of the register for the indicated operand.
7656
        %b0 would print %al if operands[0] is reg 0.
7657
   w --  likewise, print the HImode name of the register.
7658
   k --  likewise, print the SImode name of the register.
7659
   q --  likewise, print the DImode name of the register.
7660
   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7661
   y -- print "st(0)" instead of "st" as a register.
7662
   D -- print condition for SSE cmp instruction.
7663
   P -- if PIC, print an @PLT suffix.
7664
   X -- don't print any sort of PIC '@' suffix for a symbol.
7665
   & -- print some in-use local-dynamic symbol name.
7666
   H -- print a memory address offset by 8; used for sse high-parts
7667
 */
7668
 
7669
void
7670
print_operand (FILE *file, rtx x, int code)
7671
{
7672
  if (code)
7673
    {
7674
      switch (code)
7675
        {
7676
        case '*':
7677
          if (ASSEMBLER_DIALECT == ASM_ATT)
7678
            putc ('*', file);
7679
          return;
7680
 
7681
        case '&':
7682
          assemble_name (file, get_some_local_dynamic_name ());
7683
          return;
7684
 
7685
        case 'A':
7686
          switch (ASSEMBLER_DIALECT)
7687
            {
7688
            case ASM_ATT:
7689
              putc ('*', file);
7690
              break;
7691
 
7692
            case ASM_INTEL:
7693
              /* Intel syntax. For absolute addresses, registers should not
7694
                 be surrounded by braces.  */
7695
              if (GET_CODE (x) != REG)
7696
                {
7697
                  putc ('[', file);
7698
                  PRINT_OPERAND (file, x, 0);
7699
                  putc (']', file);
7700
                  return;
7701
                }
7702
              break;
7703
 
7704
            default:
7705
              gcc_unreachable ();
7706
            }
7707
 
7708
          PRINT_OPERAND (file, x, 0);
7709
          return;
7710
 
7711
 
7712
        case 'L':
7713
          if (ASSEMBLER_DIALECT == ASM_ATT)
7714
            putc ('l', file);
7715
          return;
7716
 
7717
        case 'W':
7718
          if (ASSEMBLER_DIALECT == ASM_ATT)
7719
            putc ('w', file);
7720
          return;
7721
 
7722
        case 'B':
7723
          if (ASSEMBLER_DIALECT == ASM_ATT)
7724
            putc ('b', file);
7725
          return;
7726
 
7727
        case 'Q':
7728
          if (ASSEMBLER_DIALECT == ASM_ATT)
7729
            putc ('l', file);
7730
          return;
7731
 
7732
        case 'S':
7733
          if (ASSEMBLER_DIALECT == ASM_ATT)
7734
            putc ('s', file);
7735
          return;
7736
 
7737
        case 'T':
7738
          if (ASSEMBLER_DIALECT == ASM_ATT)
7739
            putc ('t', file);
7740
          return;
7741
 
7742
        case 'z':
7743
          /* 387 opcodes don't get size suffixes if the operands are
7744
             registers.  */
7745
          if (STACK_REG_P (x))
7746
            return;
7747
 
7748
          /* Likewise if using Intel opcodes.  */
7749
          if (ASSEMBLER_DIALECT == ASM_INTEL)
7750
            return;
7751
 
7752
          /* This is the size of op from size of operand.  */
7753
          switch (GET_MODE_SIZE (GET_MODE (x)))
7754
            {
7755
            case 2:
7756
#ifdef HAVE_GAS_FILDS_FISTS
7757
              putc ('s', file);
7758
#endif
7759
              return;
7760
 
7761
            case 4:
7762
              if (GET_MODE (x) == SFmode)
7763
                {
7764
                  putc ('s', file);
7765
                  return;
7766
                }
7767
              else
7768
                putc ('l', file);
7769
              return;
7770
 
7771
            case 12:
7772
            case 16:
7773
              putc ('t', file);
7774
              return;
7775
 
7776
            case 8:
7777
              if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7778
                {
7779
#ifdef GAS_MNEMONICS
7780
                  putc ('q', file);
7781
#else
7782
                  putc ('l', file);
7783
                  putc ('l', file);
7784
#endif
7785
                }
7786
              else
7787
                putc ('l', file);
7788
              return;
7789
 
7790
            default:
7791
              gcc_unreachable ();
7792
            }
7793
 
7794
        case 'b':
7795
        case 'w':
7796
        case 'k':
7797
        case 'q':
7798
        case 'h':
7799
        case 'y':
7800
        case 'X':
7801
        case 'P':
7802
          break;
7803
 
7804
        case 's':
7805
          if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7806
            {
7807
              PRINT_OPERAND (file, x, 0);
7808
              putc (',', file);
7809
            }
7810
          return;
7811
 
7812
        case 'D':
7813
          /* Little bit of braindamage here.  The SSE compare instructions
7814
             does use completely different names for the comparisons that the
7815
             fp conditional moves.  */
7816
          switch (GET_CODE (x))
7817
            {
7818
            case EQ:
7819
            case UNEQ:
7820
              fputs ("eq", file);
7821
              break;
7822
            case LT:
7823
            case UNLT:
7824
              fputs ("lt", file);
7825
              break;
7826
            case LE:
7827
            case UNLE:
7828
              fputs ("le", file);
7829
              break;
7830
            case UNORDERED:
7831
              fputs ("unord", file);
7832
              break;
7833
            case NE:
7834
            case LTGT:
7835
              fputs ("neq", file);
7836
              break;
7837
            case UNGE:
7838
            case GE:
7839
              fputs ("nlt", file);
7840
              break;
7841
            case UNGT:
7842
            case GT:
7843
              fputs ("nle", file);
7844
              break;
7845
            case ORDERED:
7846
              fputs ("ord", file);
7847
              break;
7848
            default:
7849
              gcc_unreachable ();
7850
            }
7851
          return;
7852
        case 'O':
7853
#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7854
          if (ASSEMBLER_DIALECT == ASM_ATT)
7855
            {
7856
              switch (GET_MODE (x))
7857
                {
7858
                case HImode: putc ('w', file); break;
7859
                case SImode:
7860
                case SFmode: putc ('l', file); break;
7861
                case DImode:
7862
                case DFmode: putc ('q', file); break;
7863
                default: gcc_unreachable ();
7864
                }
7865
              putc ('.', file);
7866
            }
7867
#endif
7868
          return;
7869
        case 'C':
7870
          put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7871
          return;
7872
        case 'F':
7873
#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7874
          if (ASSEMBLER_DIALECT == ASM_ATT)
7875
            putc ('.', file);
7876
#endif
7877
          put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7878
          return;
7879
 
7880
          /* Like above, but reverse condition */
7881
        case 'c':
7882
          /* Check to see if argument to %c is really a constant
7883
             and not a condition code which needs to be reversed.  */
7884
          if (!COMPARISON_P (x))
7885
          {
7886
            output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7887
             return;
7888
          }
7889
          put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7890
          return;
7891
        case 'f':
7892
#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7893
          if (ASSEMBLER_DIALECT == ASM_ATT)
7894
            putc ('.', file);
7895
#endif
7896
          put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7897
          return;
7898
 
7899
        case 'H':
7900
          /* It doesn't actually matter what mode we use here, as we're
7901
             only going to use this for printing.  */
7902
          x = adjust_address_nv (x, DImode, 8);
7903
          break;
7904
 
7905
        case '+':
7906
          {
7907
            rtx x;
7908
 
7909
            if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7910
              return;
7911
 
7912
            x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7913
            if (x)
7914
              {
7915
                int pred_val = INTVAL (XEXP (x, 0));
7916
 
7917
                if (pred_val < REG_BR_PROB_BASE * 45 / 100
7918
                    || pred_val > REG_BR_PROB_BASE * 55 / 100)
7919
                  {
7920
                    int taken = pred_val > REG_BR_PROB_BASE / 2;
7921
                    int cputaken = final_forward_branch_p (current_output_insn) == 0;
7922
 
7923
                    /* Emit hints only in the case default branch prediction
7924
                       heuristics would fail.  */
7925
                    if (taken != cputaken)
7926
                      {
7927
                        /* We use 3e (DS) prefix for taken branches and
7928
                           2e (CS) prefix for not taken branches.  */
7929
                        if (taken)
7930
                          fputs ("ds ; ", file);
7931
                        else
7932
                          fputs ("cs ; ", file);
7933
                      }
7934
                  }
7935
              }
7936
            return;
7937
          }
7938
        default:
7939
            output_operand_lossage ("invalid operand code '%c'", code);
7940
        }
7941
    }
7942
 
7943
  if (GET_CODE (x) == REG)
7944
    print_reg (x, code, file);
7945
 
7946
  else if (GET_CODE (x) == MEM)
7947
    {
7948
      /* No `byte ptr' prefix for call instructions.  */
7949
      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7950
        {
7951
          const char * size;
7952
          switch (GET_MODE_SIZE (GET_MODE (x)))
7953
            {
7954
            case 1: size = "BYTE"; break;
7955
            case 2: size = "WORD"; break;
7956
            case 4: size = "DWORD"; break;
7957
            case 8: size = "QWORD"; break;
7958
            case 12: size = "XWORD"; break;
7959
            case 16: size = "XMMWORD"; break;
7960
            default:
7961
              gcc_unreachable ();
7962
            }
7963
 
7964
          /* Check for explicit size override (codes 'b', 'w' and 'k')  */
7965
          if (code == 'b')
7966
            size = "BYTE";
7967
          else if (code == 'w')
7968
            size = "WORD";
7969
          else if (code == 'k')
7970
            size = "DWORD";
7971
 
7972
          fputs (size, file);
7973
          fputs (" PTR ", file);
7974
        }
7975
 
7976
      x = XEXP (x, 0);
7977
      /* Avoid (%rip) for call operands.  */
7978
      if (CONSTANT_ADDRESS_P (x) && code == 'P'
7979
               && GET_CODE (x) != CONST_INT)
7980
        output_addr_const (file, x);
7981
      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7982
        output_operand_lossage ("invalid constraints for operand");
7983
      else
7984
        output_address (x);
7985
    }
7986
 
7987
  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7988
    {
7989
      REAL_VALUE_TYPE r;
7990
      long l;
7991
 
7992
      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7993
      REAL_VALUE_TO_TARGET_SINGLE (r, l);
7994
 
7995
      if (ASSEMBLER_DIALECT == ASM_ATT)
7996
        putc ('$', file);
7997
      fprintf (file, "0x%08lx", l);
7998
    }
7999
 
8000
  /* These float cases don't actually occur as immediate operands.  */
8001
  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8002
    {
8003
      char dstr[30];
8004
 
8005
      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8006
      fprintf (file, "%s", dstr);
8007
    }
8008
 
8009
  else if (GET_CODE (x) == CONST_DOUBLE
8010
           && GET_MODE (x) == XFmode)
8011
    {
8012
      char dstr[30];
8013
 
8014
      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8015
      fprintf (file, "%s", dstr);
8016
    }
8017
 
8018
  else
8019
    {
8020
      /* We have patterns that allow zero sets of memory, for instance.
8021
         In 64-bit mode, we should probably support all 8-byte vectors,
8022
         since we can in fact encode that into an immediate.  */
8023
      if (GET_CODE (x) == CONST_VECTOR)
8024
        {
8025
          gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8026
          x = const0_rtx;
8027
        }
8028
 
8029
      if (code != 'P')
8030
        {
8031
          if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8032
            {
8033
              if (ASSEMBLER_DIALECT == ASM_ATT)
8034
                putc ('$', file);
8035
            }
8036
          else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8037
                   || GET_CODE (x) == LABEL_REF)
8038
            {
8039
              if (ASSEMBLER_DIALECT == ASM_ATT)
8040
                putc ('$', file);
8041
              else
8042
                fputs ("OFFSET FLAT:", file);
8043
            }
8044
        }
8045
      if (GET_CODE (x) == CONST_INT)
8046
        fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8047
      else if (flag_pic)
8048
        output_pic_addr_const (file, x, code);
8049
      else
8050
        output_addr_const (file, x);
8051
    }
8052
}
8053
 
8054
/* Print a memory operand whose address is ADDR.  */
8055
 
8056
void
8057
print_operand_address (FILE *file, rtx addr)
8058
{
8059
  struct ix86_address parts;
8060
  rtx base, index, disp;
8061
  int scale;
8062
  int ok = ix86_decompose_address (addr, &parts);
8063
 
8064
  gcc_assert (ok);
8065
 
8066
  base = parts.base;
8067
  index = parts.index;
8068
  disp = parts.disp;
8069
  scale = parts.scale;
8070
 
8071
  switch (parts.seg)
8072
    {
8073
    case SEG_DEFAULT:
8074
      break;
8075
    case SEG_FS:
8076
    case SEG_GS:
8077
      if (USER_LABEL_PREFIX[0] == 0)
8078
        putc ('%', file);
8079
      fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8080
      break;
8081
    default:
8082
      gcc_unreachable ();
8083
    }
8084
 
8085
  if (!base && !index)
8086
    {
8087
      /* Displacement only requires special attention.  */
8088
 
8089
      if (GET_CODE (disp) == CONST_INT)
8090
        {
8091
          if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8092
            {
8093
              if (USER_LABEL_PREFIX[0] == 0)
8094
                putc ('%', file);
8095
              fputs ("ds:", file);
8096
            }
8097
          fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8098
        }
8099
      else if (flag_pic)
8100
        output_pic_addr_const (file, disp, 0);
8101
      else
8102
        output_addr_const (file, disp);
8103
 
8104
      /* Use one byte shorter RIP relative addressing for 64bit mode.  */
8105
      if (TARGET_64BIT)
8106
        {
8107
          if (GET_CODE (disp) == CONST
8108
              && GET_CODE (XEXP (disp, 0)) == PLUS
8109
              && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8110
            disp = XEXP (XEXP (disp, 0), 0);
8111
          if (GET_CODE (disp) == LABEL_REF
8112
              || (GET_CODE (disp) == SYMBOL_REF
8113
                  && SYMBOL_REF_TLS_MODEL (disp) == 0))
8114
            fputs ("(%rip)", file);
8115
        }
8116
    }
8117
  else
8118
    {
8119
      if (ASSEMBLER_DIALECT == ASM_ATT)
8120
        {
8121
          if (disp)
8122
            {
8123
              if (flag_pic)
8124
                output_pic_addr_const (file, disp, 0);
8125
              else if (GET_CODE (disp) == LABEL_REF)
8126
                output_asm_label (disp);
8127
              else
8128
                output_addr_const (file, disp);
8129
            }
8130
 
8131
          putc ('(', file);
8132
          if (base)
8133
            print_reg (base, 0, file);
8134
          if (index)
8135
            {
8136
              putc (',', file);
8137
              print_reg (index, 0, file);
8138
              if (scale != 1)
8139
                fprintf (file, ",%d", scale);
8140
            }
8141
          putc (')', file);
8142
        }
8143
      else
8144
        {
8145
          rtx offset = NULL_RTX;
8146
 
8147
          if (disp)
8148
            {
8149
              /* Pull out the offset of a symbol; print any symbol itself.  */
8150
              if (GET_CODE (disp) == CONST
8151
                  && GET_CODE (XEXP (disp, 0)) == PLUS
8152
                  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8153
                {
8154
                  offset = XEXP (XEXP (disp, 0), 1);
8155
                  disp = gen_rtx_CONST (VOIDmode,
8156
                                        XEXP (XEXP (disp, 0), 0));
8157
                }
8158
 
8159
              if (flag_pic)
8160
                output_pic_addr_const (file, disp, 0);
8161
              else if (GET_CODE (disp) == LABEL_REF)
8162
                output_asm_label (disp);
8163
              else if (GET_CODE (disp) == CONST_INT)
8164
                offset = disp;
8165
              else
8166
                output_addr_const (file, disp);
8167
            }
8168
 
8169
          putc ('[', file);
8170
          if (base)
8171
            {
8172
              print_reg (base, 0, file);
8173
              if (offset)
8174
                {
8175
                  if (INTVAL (offset) >= 0)
8176
                    putc ('+', file);
8177
                  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8178
                }
8179
            }
8180
          else if (offset)
8181
            fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8182
          else
8183
            putc ('0', file);
8184
 
8185
          if (index)
8186
            {
8187
              putc ('+', file);
8188
              print_reg (index, 0, file);
8189
              if (scale != 1)
8190
                fprintf (file, "*%d", scale);
8191
            }
8192
          putc (']', file);
8193
        }
8194
    }
8195
}
8196
 
8197
bool
8198
output_addr_const_extra (FILE *file, rtx x)
8199
{
8200
  rtx op;
8201
 
8202
  if (GET_CODE (x) != UNSPEC)
8203
    return false;
8204
 
8205
  op = XVECEXP (x, 0, 0);
8206
  switch (XINT (x, 1))
8207
    {
8208
    case UNSPEC_GOTTPOFF:
8209
      output_addr_const (file, op);
8210
      /* FIXME: This might be @TPOFF in Sun ld.  */
8211
      fputs ("@GOTTPOFF", file);
8212
      break;
8213
    case UNSPEC_TPOFF:
8214
      output_addr_const (file, op);
8215
      fputs ("@TPOFF", file);
8216
      break;
8217
    case UNSPEC_NTPOFF:
8218
      output_addr_const (file, op);
8219
      if (TARGET_64BIT)
8220
        fputs ("@TPOFF", file);
8221
      else
8222
        fputs ("@NTPOFF", file);
8223
      break;
8224
    case UNSPEC_DTPOFF:
8225
      output_addr_const (file, op);
8226
      fputs ("@DTPOFF", file);
8227
      break;
8228
    case UNSPEC_GOTNTPOFF:
8229
      output_addr_const (file, op);
8230
      if (TARGET_64BIT)
8231
        fputs ("@GOTTPOFF(%rip)", file);
8232
      else
8233
        fputs ("@GOTNTPOFF", file);
8234
      break;
8235
    case UNSPEC_INDNTPOFF:
8236
      output_addr_const (file, op);
8237
      fputs ("@INDNTPOFF", file);
8238
      break;
8239
 
8240
    default:
8241
      return false;
8242
    }
8243
 
8244
  return true;
8245
}
8246
 
8247
/* Split one or more DImode RTL references into pairs of SImode
8248
   references.  The RTL can be REG, offsettable MEM, integer constant, or
8249
   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
8250
   split and "num" is its length.  lo_half and hi_half are output arrays
8251
   that parallel "operands".  */
8252
 
8253
void
8254
split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8255
{
8256
  while (num--)
8257
    {
8258
      rtx op = operands[num];
8259
 
8260
      /* simplify_subreg refuse to split volatile memory addresses,
8261
         but we still have to handle it.  */
8262
      if (GET_CODE (op) == MEM)
8263
        {
8264
          lo_half[num] = adjust_address (op, SImode, 0);
8265
          hi_half[num] = adjust_address (op, SImode, 4);
8266
        }
8267
      else
8268
        {
8269
          lo_half[num] = simplify_gen_subreg (SImode, op,
8270
                                              GET_MODE (op) == VOIDmode
8271
                                              ? DImode : GET_MODE (op), 0);
8272
          hi_half[num] = simplify_gen_subreg (SImode, op,
8273
                                              GET_MODE (op) == VOIDmode
8274
                                              ? DImode : GET_MODE (op), 4);
8275
        }
8276
    }
8277
}
8278
/* Split one or more TImode RTL references into pairs of DImode
8279
   references.  The RTL can be REG, offsettable MEM, integer constant, or
8280
   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
8281
   split and "num" is its length.  lo_half and hi_half are output arrays
8282
   that parallel "operands".  */
8283
 
8284
void
8285
split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8286
{
8287
  while (num--)
8288
    {
8289
      rtx op = operands[num];
8290
 
8291
      /* simplify_subreg refuse to split volatile memory addresses, but we
8292
         still have to handle it.  */
8293
      if (GET_CODE (op) == MEM)
8294
        {
8295
          lo_half[num] = adjust_address (op, DImode, 0);
8296
          hi_half[num] = adjust_address (op, DImode, 8);
8297
        }
8298
      else
8299
        {
8300
          lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8301
          hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8302
        }
8303
    }
8304
}
8305
 
8306
/* Output code to perform a 387 binary operation in INSN, one of PLUS,
8307
   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
8308
   is the expression of the binary operation.  The output may either be
8309
   emitted here, or returned to the caller, like all output_* functions.
8310
 
8311
   There is no guarantee that the operands are the same mode, as they
8312
   might be within FLOAT or FLOAT_EXTEND expressions.  */
8313
 
8314
#ifndef SYSV386_COMPAT
8315
/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
8316
   wants to fix the assemblers because that causes incompatibility
8317
   with gcc.  No-one wants to fix gcc because that causes
8318
   incompatibility with assemblers...  You can use the option of
8319
   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
8320
#define SYSV386_COMPAT 1
8321
#endif
8322
 
8323
const char *
8324
output_387_binary_op (rtx insn, rtx *operands)
8325
{
8326
  static char buf[30];
8327
  const char *p;
8328
  const char *ssep;
8329
  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8330
 
8331
#ifdef ENABLE_CHECKING
8332
  /* Even if we do not want to check the inputs, this documents input
8333
     constraints.  Which helps in understanding the following code.  */
8334
  if (STACK_REG_P (operands[0])
8335
      && ((REG_P (operands[1])
8336
           && REGNO (operands[0]) == REGNO (operands[1])
8337
           && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8338
          || (REG_P (operands[2])
8339
              && REGNO (operands[0]) == REGNO (operands[2])
8340
              && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8341
      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8342
    ; /* ok */
8343
  else
8344
    gcc_assert (is_sse);
8345
#endif
8346
 
8347
  switch (GET_CODE (operands[3]))
8348
    {
8349
    case PLUS:
8350
      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8351
          || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8352
        p = "fiadd";
8353
      else
8354
        p = "fadd";
8355
      ssep = "add";
8356
      break;
8357
 
8358
    case MINUS:
8359
      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8360
          || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8361
        p = "fisub";
8362
      else
8363
        p = "fsub";
8364
      ssep = "sub";
8365
      break;
8366
 
8367
    case MULT:
8368
      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8369
          || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8370
        p = "fimul";
8371
      else
8372
        p = "fmul";
8373
      ssep = "mul";
8374
      break;
8375
 
8376
    case DIV:
8377
      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8378
          || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8379
        p = "fidiv";
8380
      else
8381
        p = "fdiv";
8382
      ssep = "div";
8383
      break;
8384
 
8385
    default:
8386
      gcc_unreachable ();
8387
    }
8388
 
8389
  if (is_sse)
8390
   {
8391
      strcpy (buf, ssep);
8392
      if (GET_MODE (operands[0]) == SFmode)
8393
        strcat (buf, "ss\t{%2, %0|%0, %2}");
8394
      else
8395
        strcat (buf, "sd\t{%2, %0|%0, %2}");
8396
      return buf;
8397
   }
8398
  strcpy (buf, p);
8399
 
8400
  switch (GET_CODE (operands[3]))
8401
    {
8402
    case MULT:
8403
    case PLUS:
8404
      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8405
        {
8406
          rtx temp = operands[2];
8407
          operands[2] = operands[1];
8408
          operands[1] = temp;
8409
        }
8410
 
8411
      /* know operands[0] == operands[1].  */
8412
 
8413
      if (GET_CODE (operands[2]) == MEM)
8414
        {
8415
          p = "%z2\t%2";
8416
          break;
8417
        }
8418
 
8419
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8420
        {
8421
          if (STACK_TOP_P (operands[0]))
8422
            /* How is it that we are storing to a dead operand[2]?
8423
               Well, presumably operands[1] is dead too.  We can't
8424
               store the result to st(0) as st(0) gets popped on this
8425
               instruction.  Instead store to operands[2] (which I
8426
               think has to be st(1)).  st(1) will be popped later.
8427
               gcc <= 2.8.1 didn't have this check and generated
8428
               assembly code that the Unixware assembler rejected.  */
8429
            p = "p\t{%0, %2|%2, %0}";   /* st(1) = st(0) op st(1); pop */
8430
          else
8431
            p = "p\t{%2, %0|%0, %2}";   /* st(r1) = st(r1) op st(0); pop */
8432
          break;
8433
        }
8434
 
8435
      if (STACK_TOP_P (operands[0]))
8436
        p = "\t{%y2, %0|%0, %y2}";      /* st(0) = st(0) op st(r2) */
8437
      else
8438
        p = "\t{%2, %0|%0, %2}";        /* st(r1) = st(r1) op st(0) */
8439
      break;
8440
 
8441
    case MINUS:
8442
    case DIV:
8443
      if (GET_CODE (operands[1]) == MEM)
8444
        {
8445
          p = "r%z1\t%1";
8446
          break;
8447
        }
8448
 
8449
      if (GET_CODE (operands[2]) == MEM)
8450
        {
8451
          p = "%z2\t%2";
8452
          break;
8453
        }
8454
 
8455
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8456
        {
8457
#if SYSV386_COMPAT
8458
          /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8459
             derived assemblers, confusingly reverse the direction of
8460
             the operation for fsub{r} and fdiv{r} when the
8461
             destination register is not st(0).  The Intel assembler
8462
             doesn't have this brain damage.  Read !SYSV386_COMPAT to
8463
             figure out what the hardware really does.  */
8464
          if (STACK_TOP_P (operands[0]))
8465
            p = "{p\t%0, %2|rp\t%2, %0}";
8466
          else
8467
            p = "{rp\t%2, %0|p\t%0, %2}";
8468
#else
8469
          if (STACK_TOP_P (operands[0]))
8470
            /* As above for fmul/fadd, we can't store to st(0).  */
8471
            p = "rp\t{%0, %2|%2, %0}";  /* st(1) = st(0) op st(1); pop */
8472
          else
8473
            p = "p\t{%2, %0|%0, %2}";   /* st(r1) = st(r1) op st(0); pop */
8474
#endif
8475
          break;
8476
        }
8477
 
8478
      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8479
        {
8480
#if SYSV386_COMPAT
8481
          if (STACK_TOP_P (operands[0]))
8482
            p = "{rp\t%0, %1|p\t%1, %0}";
8483
          else
8484
            p = "{p\t%1, %0|rp\t%0, %1}";
8485
#else
8486
          if (STACK_TOP_P (operands[0]))
8487
            p = "p\t{%0, %1|%1, %0}";   /* st(1) = st(1) op st(0); pop */
8488
          else
8489
            p = "rp\t{%1, %0|%0, %1}";  /* st(r2) = st(0) op st(r2); pop */
8490
#endif
8491
          break;
8492
        }
8493
 
8494
      if (STACK_TOP_P (operands[0]))
8495
        {
8496
          if (STACK_TOP_P (operands[1]))
8497
            p = "\t{%y2, %0|%0, %y2}";  /* st(0) = st(0) op st(r2) */
8498
          else
8499
            p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8500
          break;
8501
        }
8502
      else if (STACK_TOP_P (operands[1]))
8503
        {
8504
#if SYSV386_COMPAT
8505
          p = "{\t%1, %0|r\t%0, %1}";
8506
#else
8507
          p = "r\t{%1, %0|%0, %1}";     /* st(r2) = st(0) op st(r2) */
8508
#endif
8509
        }
8510
      else
8511
        {
8512
#if SYSV386_COMPAT
8513
          p = "{r\t%2, %0|\t%0, %2}";
8514
#else
8515
          p = "\t{%2, %0|%0, %2}";      /* st(r1) = st(r1) op st(0) */
8516
#endif
8517
        }
8518
      break;
8519
 
8520
    default:
8521
      gcc_unreachable ();
8522
    }
8523
 
8524
  strcat (buf, p);
8525
  return buf;
8526
}
8527
 
8528
/* Return needed mode for entity in optimize_mode_switching pass.  */
8529
 
8530
int
8531
ix86_mode_needed (int entity, rtx insn)
8532
{
8533
  enum attr_i387_cw mode;
8534
 
8535
  /* The mode UNINITIALIZED is used to store control word after a
8536
     function call or ASM pattern.  The mode ANY specify that function
8537
     has no requirements on the control word and make no changes in the
8538
     bits we are interested in.  */
8539
 
8540
  if (CALL_P (insn)
8541
      || (NONJUMP_INSN_P (insn)
8542
          && (asm_noperands (PATTERN (insn)) >= 0
8543
              || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8544
    return I387_CW_UNINITIALIZED;
8545
 
8546
  if (recog_memoized (insn) < 0)
8547
    return I387_CW_ANY;
8548
 
8549
  mode = get_attr_i387_cw (insn);
8550
 
8551
  switch (entity)
8552
    {
8553
    case I387_TRUNC:
8554
      if (mode == I387_CW_TRUNC)
8555
        return mode;
8556
      break;
8557
 
8558
    case I387_FLOOR:
8559
      if (mode == I387_CW_FLOOR)
8560
        return mode;
8561
      break;
8562
 
8563
    case I387_CEIL:
8564
      if (mode == I387_CW_CEIL)
8565
        return mode;
8566
      break;
8567
 
8568
    case I387_MASK_PM:
8569
      if (mode == I387_CW_MASK_PM)
8570
        return mode;
8571
      break;
8572
 
8573
    default:
8574
      gcc_unreachable ();
8575
    }
8576
 
8577
  return I387_CW_ANY;
8578
}
8579
 
8580
/* Output code to initialize control word copies used by trunc?f?i and
8581
   rounding patterns.  CURRENT_MODE is set to current control word,
8582
   while NEW_MODE is set to new control word.  */
8583
 
8584
void
8585
emit_i387_cw_initialization (int mode)
8586
{
8587
  rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8588
  rtx new_mode;
8589
 
8590
  int slot;
8591
 
8592
  rtx reg = gen_reg_rtx (HImode);
8593
 
8594
  emit_insn (gen_x86_fnstcw_1 (stored_mode));
8595
  emit_move_insn (reg, stored_mode);
8596
 
8597
  if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8598
    {
8599
      switch (mode)
8600
        {
8601
        case I387_CW_TRUNC:
8602
          /* round toward zero (truncate) */
8603
          emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8604
          slot = SLOT_CW_TRUNC;
8605
          break;
8606
 
8607
        case I387_CW_FLOOR:
8608
          /* round down toward -oo */
8609
          emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8610
          emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8611
          slot = SLOT_CW_FLOOR;
8612
          break;
8613
 
8614
        case I387_CW_CEIL:
8615
          /* round up toward +oo */
8616
          emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8617
          emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8618
          slot = SLOT_CW_CEIL;
8619
          break;
8620
 
8621
        case I387_CW_MASK_PM:
8622
          /* mask precision exception for nearbyint() */
8623
          emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8624
          slot = SLOT_CW_MASK_PM;
8625
          break;
8626
 
8627
        default:
8628
          gcc_unreachable ();
8629
        }
8630
    }
8631
  else
8632
    {
8633
      switch (mode)
8634
        {
8635
        case I387_CW_TRUNC:
8636
          /* round toward zero (truncate) */
8637
          emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8638
          slot = SLOT_CW_TRUNC;
8639
          break;
8640
 
8641
        case I387_CW_FLOOR:
8642
          /* round down toward -oo */
8643
          emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8644
          slot = SLOT_CW_FLOOR;
8645
          break;
8646
 
8647
        case I387_CW_CEIL:
8648
          /* round up toward +oo */
8649
          emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8650
          slot = SLOT_CW_CEIL;
8651
          break;
8652
 
8653
        case I387_CW_MASK_PM:
8654
          /* mask precision exception for nearbyint() */
8655
          emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8656
          slot = SLOT_CW_MASK_PM;
8657
          break;
8658
 
8659
        default:
8660
          gcc_unreachable ();
8661
        }
8662
    }
8663
 
8664
  gcc_assert (slot < MAX_386_STACK_LOCALS);
8665
 
8666
  new_mode = assign_386_stack_local (HImode, slot);
8667
  emit_move_insn (new_mode, reg);
8668
}
8669
 
8670
/* Output code for INSN to convert a float to a signed int.  OPERANDS
8671
   are the insn operands.  The output may be [HSD]Imode and the input
8672
   operand may be [SDX]Fmode.  */
8673
 
8674
const char *
8675
output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8676
{
8677
  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8678
  int dimode_p = GET_MODE (operands[0]) == DImode;
8679
  int round_mode = get_attr_i387_cw (insn);
8680
 
8681
  /* Jump through a hoop or two for DImode, since the hardware has no
8682
     non-popping instruction.  We used to do this a different way, but
8683
     that was somewhat fragile and broke with post-reload splitters.  */
8684
  if ((dimode_p || fisttp) && !stack_top_dies)
8685
    output_asm_insn ("fld\t%y1", operands);
8686
 
8687
  gcc_assert (STACK_TOP_P (operands[1]));
8688
  gcc_assert (GET_CODE (operands[0]) == MEM);
8689
 
8690
  if (fisttp)
8691
      output_asm_insn ("fisttp%z0\t%0", operands);
8692
  else
8693
    {
8694
      if (round_mode != I387_CW_ANY)
8695
        output_asm_insn ("fldcw\t%3", operands);
8696
      if (stack_top_dies || dimode_p)
8697
        output_asm_insn ("fistp%z0\t%0", operands);
8698
      else
8699
        output_asm_insn ("fist%z0\t%0", operands);
8700
      if (round_mode != I387_CW_ANY)
8701
        output_asm_insn ("fldcw\t%2", operands);
8702
    }
8703
 
8704
  return "";
8705
}
8706
 
8707
/* Output code for x87 ffreep insn.  The OPNO argument, which may only
8708
   have the values zero or one, indicates the ffreep insn's operand
8709
   from the OPERANDS array.  */
8710
 
8711
static const char *
8712
output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8713
{
8714
  if (TARGET_USE_FFREEP)
8715
#if HAVE_AS_IX86_FFREEP
8716
    return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8717
#else
8718
    switch (REGNO (operands[opno]))
8719
      {
8720
      case FIRST_STACK_REG + 0: return ".word\t0xc0df";
8721
      case FIRST_STACK_REG + 1: return ".word\t0xc1df";
8722
      case FIRST_STACK_REG + 2: return ".word\t0xc2df";
8723
      case FIRST_STACK_REG + 3: return ".word\t0xc3df";
8724
      case FIRST_STACK_REG + 4: return ".word\t0xc4df";
8725
      case FIRST_STACK_REG + 5: return ".word\t0xc5df";
8726
      case FIRST_STACK_REG + 6: return ".word\t0xc6df";
8727
      case FIRST_STACK_REG + 7: return ".word\t0xc7df";
8728
      }
8729
#endif
8730
 
8731
  return opno ? "fstp\t%y1" : "fstp\t%y0";
8732
}
8733
 
8734
 
8735
/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
8736
   should be used.  UNORDERED_P is true when fucom should be used.  */
8737
 
8738
const char *
8739
output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8740
{
8741
  int stack_top_dies;
8742
  rtx cmp_op0, cmp_op1;
8743
  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8744
 
8745
  if (eflags_p)
8746
    {
8747
      cmp_op0 = operands[0];
8748
      cmp_op1 = operands[1];
8749
    }
8750
  else
8751
    {
8752
      cmp_op0 = operands[1];
8753
      cmp_op1 = operands[2];
8754
    }
8755
 
8756
  if (is_sse)
8757
    {
8758
      if (GET_MODE (operands[0]) == SFmode)
8759
        if (unordered_p)
8760
          return "ucomiss\t{%1, %0|%0, %1}";
8761
        else
8762
          return "comiss\t{%1, %0|%0, %1}";
8763
      else
8764
        if (unordered_p)
8765
          return "ucomisd\t{%1, %0|%0, %1}";
8766
        else
8767
          return "comisd\t{%1, %0|%0, %1}";
8768
    }
8769
 
8770
  gcc_assert (STACK_TOP_P (cmp_op0));
8771
 
8772
  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8773
 
8774
  if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8775
    {
8776
      if (stack_top_dies)
8777
        {
8778
          output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8779
          return output_387_ffreep (operands, 1);
8780
        }
8781
      else
8782
        return "ftst\n\tfnstsw\t%0";
8783
    }
8784
 
8785
  if (STACK_REG_P (cmp_op1)
8786
      && stack_top_dies
8787
      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8788
      && REGNO (cmp_op1) != FIRST_STACK_REG)
8789
    {
8790
      /* If both the top of the 387 stack dies, and the other operand
8791
         is also a stack register that dies, then this must be a
8792
         `fcompp' float compare */
8793
 
8794
      if (eflags_p)
8795
        {
8796
          /* There is no double popping fcomi variant.  Fortunately,
8797
             eflags is immune from the fstp's cc clobbering.  */
8798
          if (unordered_p)
8799
            output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8800
          else
8801
            output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8802
          return output_387_ffreep (operands, 0);
8803
        }
8804
      else
8805
        {
8806
          if (unordered_p)
8807
            return "fucompp\n\tfnstsw\t%0";
8808
          else
8809
            return "fcompp\n\tfnstsw\t%0";
8810
        }
8811
    }
8812
  else
8813
    {
8814
      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
8815
 
8816
      static const char * const alt[16] =
8817
      {
8818
        "fcom%z2\t%y2\n\tfnstsw\t%0",
8819
        "fcomp%z2\t%y2\n\tfnstsw\t%0",
8820
        "fucom%z2\t%y2\n\tfnstsw\t%0",
8821
        "fucomp%z2\t%y2\n\tfnstsw\t%0",
8822
 
8823
        "ficom%z2\t%y2\n\tfnstsw\t%0",
8824
        "ficomp%z2\t%y2\n\tfnstsw\t%0",
8825
        NULL,
8826
        NULL,
8827
 
8828
        "fcomi\t{%y1, %0|%0, %y1}",
8829
        "fcomip\t{%y1, %0|%0, %y1}",
8830
        "fucomi\t{%y1, %0|%0, %y1}",
8831
        "fucomip\t{%y1, %0|%0, %y1}",
8832
 
8833
        NULL,
8834
        NULL,
8835
        NULL,
8836
        NULL
8837
      };
8838
 
8839
      int mask;
8840
      const char *ret;
8841
 
8842
      mask  = eflags_p << 3;
8843
      mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8844
      mask |= unordered_p << 1;
8845
      mask |= stack_top_dies;
8846
 
8847
      gcc_assert (mask < 16);
8848
      ret = alt[mask];
8849
      gcc_assert (ret);
8850
 
8851
      return ret;
8852
    }
8853
}
8854
 
8855
void
8856
ix86_output_addr_vec_elt (FILE *file, int value)
8857
{
8858
  const char *directive = ASM_LONG;
8859
 
8860
#ifdef ASM_QUAD
8861
  if (TARGET_64BIT)
8862
    directive = ASM_QUAD;
8863
#else
8864
  gcc_assert (!TARGET_64BIT);
8865
#endif
8866
 
8867
  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8868
}
8869
 
8870
void
8871
ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8872
{
8873
  if (TARGET_64BIT)
8874
    fprintf (file, "%s%s%d-%s%d\n",
8875
             ASM_LONG, LPREFIX, value, LPREFIX, rel);
8876
  else if (HAVE_AS_GOTOFF_IN_DATA)
8877
    fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8878
#if TARGET_MACHO
8879
  else if (TARGET_MACHO)
8880
    {
8881
      fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8882
      machopic_output_function_base_name (file);
8883
      fprintf(file, "\n");
8884
    }
8885
#endif
8886
  else
8887
    asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8888
                 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8889
}
8890
 
8891
/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8892
   for the target.  */
8893
 
8894
void
8895
ix86_expand_clear (rtx dest)
8896
{
8897
  rtx tmp;
8898
 
8899
  /* We play register width games, which are only valid after reload.  */
8900
  gcc_assert (reload_completed);
8901
 
8902
  /* Avoid HImode and its attendant prefix byte.  */
8903
  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8904
    dest = gen_rtx_REG (SImode, REGNO (dest));
8905
 
8906
  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8907
 
8908
  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
8909
  if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8910
    {
8911
      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8912
      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8913
    }
8914
 
8915
  emit_insn (tmp);
8916
}
8917
 
8918
/* X is an unchanging MEM.  If it is a constant pool reference, return
8919
   the constant pool rtx, else NULL.  */
8920
 
8921
rtx
8922
maybe_get_pool_constant (rtx x)
8923
{
8924
  x = ix86_delegitimize_address (XEXP (x, 0));
8925
 
8926
  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8927
    return get_pool_constant (x);
8928
 
8929
  return NULL_RTX;
8930
}
8931
 
8932
void
8933
ix86_expand_move (enum machine_mode mode, rtx operands[])
8934
{
8935
  int strict = (reload_in_progress || reload_completed);
8936
  rtx op0, op1;
8937
  enum tls_model model;
8938
 
8939
  op0 = operands[0];
8940
  op1 = operands[1];
8941
 
8942
  if (GET_CODE (op1) == SYMBOL_REF)
8943
    {
8944
      model = SYMBOL_REF_TLS_MODEL (op1);
8945
      if (model)
8946
        {
8947
          op1 = legitimize_tls_address (op1, model, true);
8948
          op1 = force_operand (op1, op0);
8949
          if (op1 == op0)
8950
            return;
8951
        }
8952
    }
8953
  else if (GET_CODE (op1) == CONST
8954
           && GET_CODE (XEXP (op1, 0)) == PLUS
8955
           && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8956
    {
8957
      model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8958
      if (model)
8959
        {
8960
          rtx addend = XEXP (XEXP (op1, 0), 1);
8961
          op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8962
          op1 = force_operand (op1, NULL);
8963
          op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8964
                                     op0, 1, OPTAB_DIRECT);
8965
          if (op1 == op0)
8966
            return;
8967
        }
8968
    }
8969
 
8970
  if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8971
    {
8972
      if (TARGET_MACHO && !TARGET_64BIT)
8973
        {
8974
#if TARGET_MACHO
8975
          if (MACHOPIC_PURE)
8976
            {
8977
              rtx temp = ((reload_in_progress
8978
                           || ((op0 && GET_CODE (op0) == REG)
8979
                               && mode == Pmode))
8980
                          ? op0 : gen_reg_rtx (Pmode));
8981
              op1 = machopic_indirect_data_reference (op1, temp);
8982
              op1 = machopic_legitimize_pic_address (op1, mode,
8983
                                                     temp == op1 ? 0 : temp);
8984
            }
8985
          else if (MACHOPIC_INDIRECT)
8986
            op1 = machopic_indirect_data_reference (op1, 0);
8987
          if (op0 == op1)
8988
            return;
8989
#endif
8990
        }
8991
      else
8992
        {
8993
          if (GET_CODE (op0) == MEM)
8994
            op1 = force_reg (Pmode, op1);
8995
          else
8996
            op1 = legitimize_address (op1, op1, Pmode);
8997
        }
8998
    }
8999
  else
9000
    {
9001
      if (GET_CODE (op0) == MEM
9002
          && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9003
              || !push_operand (op0, mode))
9004
          && GET_CODE (op1) == MEM)
9005
        op1 = force_reg (mode, op1);
9006
 
9007
      if (push_operand (op0, mode)
9008
          && ! general_no_elim_operand (op1, mode))
9009
        op1 = copy_to_mode_reg (mode, op1);
9010
 
9011
      /* Force large constants in 64bit compilation into register
9012
         to get them CSEed.  */
9013
      if (TARGET_64BIT && mode == DImode
9014
          && immediate_operand (op1, mode)
9015
          && !x86_64_zext_immediate_operand (op1, VOIDmode)
9016
          && !register_operand (op0, mode)
9017
          && optimize && !reload_completed && !reload_in_progress)
9018
        op1 = copy_to_mode_reg (mode, op1);
9019
 
9020
      if (FLOAT_MODE_P (mode))
9021
        {
9022
          /* If we are loading a floating point constant to a register,
9023
             force the value to memory now, since we'll get better code
9024
             out the back end.  */
9025
 
9026
          if (strict)
9027
            ;
9028
          else if (GET_CODE (op1) == CONST_DOUBLE)
9029
            {
9030
              op1 = validize_mem (force_const_mem (mode, op1));
9031
              if (!register_operand (op0, mode))
9032
                {
9033
                  rtx temp = gen_reg_rtx (mode);
9034
                  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9035
                  emit_move_insn (op0, temp);
9036
                  return;
9037
                }
9038
            }
9039
        }
9040
    }
9041
 
9042
  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9043
}
9044
 
9045
void
9046
ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9047
{
9048
  rtx op0 = operands[0], op1 = operands[1];
9049
 
9050
  /* Force constants other than zero into memory.  We do not know how
9051
     the instructions used to build constants modify the upper 64 bits
9052
     of the register, once we have that information we may be able
9053
     to handle some of them more efficiently.  */
9054
  if ((reload_in_progress | reload_completed) == 0
9055
      && register_operand (op0, mode)
9056
      && CONSTANT_P (op1)
9057
      && standard_sse_constant_p (op1) <= 0)
9058
    op1 = validize_mem (force_const_mem (mode, op1));
9059
 
9060
  /* Make operand1 a register if it isn't already.  */
9061
  if (!no_new_pseudos
9062
      && !register_operand (op0, mode)
9063
      && !register_operand (op1, mode))
9064
    {
9065
      emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9066
      return;
9067
    }
9068
 
9069
  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9070
}
9071
 
9072
/* Implement the movmisalign patterns for SSE.  Non-SSE modes go
9073
   straight to ix86_expand_vector_move.  */
9074
 
9075
void
9076
ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9077
{
9078
  rtx op0, op1, m;
9079
 
9080
  op0 = operands[0];
9081
  op1 = operands[1];
9082
 
9083
  if (MEM_P (op1))
9084
    {
9085
      /* If we're optimizing for size, movups is the smallest.  */
9086
      if (optimize_size)
9087
        {
9088
          op0 = gen_lowpart (V4SFmode, op0);
9089
          op1 = gen_lowpart (V4SFmode, op1);
9090
          emit_insn (gen_sse_movups (op0, op1));
9091
          return;
9092
        }
9093
 
9094
      /* ??? If we have typed data, then it would appear that using
9095
         movdqu is the only way to get unaligned data loaded with
9096
         integer type.  */
9097
      if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9098
        {
9099
          op0 = gen_lowpart (V16QImode, op0);
9100
          op1 = gen_lowpart (V16QImode, op1);
9101
          emit_insn (gen_sse2_movdqu (op0, op1));
9102
          return;
9103
        }
9104
 
9105
      if (TARGET_SSE2 && mode == V2DFmode)
9106
        {
9107
          rtx zero;
9108
 
9109
          /* When SSE registers are split into halves, we can avoid
9110
             writing to the top half twice.  */
9111
          if (TARGET_SSE_SPLIT_REGS)
9112
            {
9113
              emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9114
              zero = op0;
9115
            }
9116
          else
9117
            {
9118
              /* ??? Not sure about the best option for the Intel chips.
9119
                 The following would seem to satisfy; the register is
9120
                 entirely cleared, breaking the dependency chain.  We
9121
                 then store to the upper half, with a dependency depth
9122
                 of one.  A rumor has it that Intel recommends two movsd
9123
                 followed by an unpacklpd, but this is unconfirmed.  And
9124
                 given that the dependency depth of the unpacklpd would
9125
                 still be one, I'm not sure why this would be better.  */
9126
              zero = CONST0_RTX (V2DFmode);
9127
            }
9128
 
9129
          m = adjust_address (op1, DFmode, 0);
9130
          emit_insn (gen_sse2_loadlpd (op0, zero, m));
9131
          m = adjust_address (op1, DFmode, 8);
9132
          emit_insn (gen_sse2_loadhpd (op0, op0, m));
9133
        }
9134
      else
9135
        {
9136
          if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9137
            emit_move_insn (op0, CONST0_RTX (mode));
9138
          else
9139
            emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9140
 
9141
          if (mode != V4SFmode)
9142
            op0 = gen_lowpart (V4SFmode, op0);
9143
          m = adjust_address (op1, V2SFmode, 0);
9144
          emit_insn (gen_sse_loadlps (op0, op0, m));
9145
          m = adjust_address (op1, V2SFmode, 8);
9146
          emit_insn (gen_sse_loadhps (op0, op0, m));
9147
        }
9148
    }
9149
  else if (MEM_P (op0))
9150
    {
9151
      /* If we're optimizing for size, movups is the smallest.  */
9152
      if (optimize_size)
9153
        {
9154
          op0 = gen_lowpart (V4SFmode, op0);
9155
          op1 = gen_lowpart (V4SFmode, op1);
9156
          emit_insn (gen_sse_movups (op0, op1));
9157
          return;
9158
        }
9159
 
9160
      /* ??? Similar to above, only less clear because of quote
9161
         typeless stores unquote.  */
9162
      if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9163
          && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9164
        {
9165
          op0 = gen_lowpart (V16QImode, op0);
9166
          op1 = gen_lowpart (V16QImode, op1);
9167
          emit_insn (gen_sse2_movdqu (op0, op1));
9168
          return;
9169
        }
9170
 
9171
      if (TARGET_SSE2 && mode == V2DFmode)
9172
        {
9173
          m = adjust_address (op0, DFmode, 0);
9174
          emit_insn (gen_sse2_storelpd (m, op1));
9175
          m = adjust_address (op0, DFmode, 8);
9176
          emit_insn (gen_sse2_storehpd (m, op1));
9177
        }
9178
      else
9179
        {
9180
          if (mode != V4SFmode)
9181
            op1 = gen_lowpart (V4SFmode, op1);
9182
          m = adjust_address (op0, V2SFmode, 0);
9183
          emit_insn (gen_sse_storelps (m, op1));
9184
          m = adjust_address (op0, V2SFmode, 8);
9185
          emit_insn (gen_sse_storehps (m, op1));
9186
        }
9187
    }
9188
  else
9189
    gcc_unreachable ();
9190
}
9191
 
9192
/* Expand a push in MODE.  This is some mode for which we do not support
9193
   proper push instructions, at least from the registers that we expect
9194
   the value to live in.  */
9195
 
9196
void
9197
ix86_expand_push (enum machine_mode mode, rtx x)
9198
{
9199
  rtx tmp;
9200
 
9201
  tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9202
                             GEN_INT (-GET_MODE_SIZE (mode)),
9203
                             stack_pointer_rtx, 1, OPTAB_DIRECT);
9204
  if (tmp != stack_pointer_rtx)
9205
    emit_move_insn (stack_pointer_rtx, tmp);
9206
 
9207
  tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9208
  emit_move_insn (tmp, x);
9209
}
9210
 
9211
/* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
9212
   destination to use for the operation.  If different from the true
9213
   destination in operands[0], a copy operation will be required.  */
9214
 
9215
rtx
9216
ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9217
                            rtx operands[])
9218
{
9219
  int matching_memory;
9220
  rtx src1, src2, dst;
9221
 
9222
  dst = operands[0];
9223
  src1 = operands[1];
9224
  src2 = operands[2];
9225
 
9226
  /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9227
  if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9228
      && (rtx_equal_p (dst, src2)
9229
          || immediate_operand (src1, mode)))
9230
    {
9231
      rtx temp = src1;
9232
      src1 = src2;
9233
      src2 = temp;
9234
    }
9235
 
9236
  /* If the destination is memory, and we do not have matching source
9237
     operands, do things in registers.  */
9238
  matching_memory = 0;
9239
  if (GET_CODE (dst) == MEM)
9240
    {
9241
      if (rtx_equal_p (dst, src1))
9242
        matching_memory = 1;
9243
      else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9244
               && rtx_equal_p (dst, src2))
9245
        matching_memory = 2;
9246
      else
9247
        dst = gen_reg_rtx (mode);
9248
    }
9249
 
9250
  /* Both source operands cannot be in memory.  */
9251
  if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9252
    {
9253
      if (matching_memory != 2)
9254
        src2 = force_reg (mode, src2);
9255
      else
9256
        src1 = force_reg (mode, src1);
9257
    }
9258
 
9259
  /* If the operation is not commutable, source 1 cannot be a constant
9260
     or non-matching memory.  */
9261
  if ((CONSTANT_P (src1)
9262
       || (!matching_memory && GET_CODE (src1) == MEM))
9263
      && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9264
    src1 = force_reg (mode, src1);
9265
 
9266
  src1 = operands[1] = src1;
9267
  src2 = operands[2] = src2;
9268
  return dst;
9269
}
9270
 
9271
/* Similarly, but assume that the destination has already been
9272
   set up properly.  */
9273
 
9274
void
9275
ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9276
                                    enum machine_mode mode, rtx operands[])
9277
{
9278
  rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9279
  gcc_assert (dst == operands[0]);
9280
}
9281
 
9282
/* Attempt to expand a binary operator.  Make the expansion closer to the
9283
   actual machine, then just general_operand, which will allow 3 separate
9284
   memory references (one output, two input) in a single insn.  */
9285
 
9286
void
9287
ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9288
                             rtx operands[])
9289
{
9290
  rtx src1, src2, dst, op, clob;
9291
 
9292
  dst = ix86_fixup_binary_operands (code, mode, operands);
9293
  src1 = operands[1];
9294
  src2 = operands[2];
9295
 
9296
 /* Emit the instruction.  */
9297
 
9298
  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9299
  if (reload_in_progress)
9300
    {
9301
      /* Reload doesn't know about the flags register, and doesn't know that
9302
         it doesn't want to clobber it.  We can only do this with PLUS.  */
9303
      gcc_assert (code == PLUS);
9304
      emit_insn (op);
9305
    }
9306
  else
9307
    {
9308
      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9309
      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9310
    }
9311
 
9312
  /* Fix up the destination if needed.  */
9313
  if (dst != operands[0])
9314
    emit_move_insn (operands[0], dst);
9315
}
9316
 
9317
/* Return TRUE or FALSE depending on whether the binary operator meets the
9318
   appropriate constraints.  */
9319
 
9320
int
9321
ix86_binary_operator_ok (enum rtx_code code,
9322
                         enum machine_mode mode ATTRIBUTE_UNUSED,
9323
                         rtx operands[3])
9324
{
9325
  /* Both source operands cannot be in memory.  */
9326
  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9327
    return 0;
9328
  /* If the operation is not commutable, source 1 cannot be a constant.  */
9329
  if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9330
    return 0;
9331
  /* If the destination is memory, we must have a matching source operand.  */
9332
  if (GET_CODE (operands[0]) == MEM
9333
      && ! (rtx_equal_p (operands[0], operands[1])
9334
            || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9335
                && rtx_equal_p (operands[0], operands[2]))))
9336
    return 0;
9337
  /* If the operation is not commutable and the source 1 is memory, we must
9338
     have a matching destination.  */
9339
  if (GET_CODE (operands[1]) == MEM
9340
      && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9341
      && ! rtx_equal_p (operands[0], operands[1]))
9342
    return 0;
9343
  return 1;
9344
}
9345
 
9346
/* Attempt to expand a unary operator.  Make the expansion closer to the
9347
   actual machine, then just general_operand, which will allow 2 separate
9348
   memory references (one output, one input) in a single insn.  */
9349
 
9350
void
9351
ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9352
                            rtx operands[])
9353
{
9354
  int matching_memory;
9355
  rtx src, dst, op, clob;
9356
 
9357
  dst = operands[0];
9358
  src = operands[1];
9359
 
9360
  /* If the destination is memory, and we do not have matching source
9361
     operands, do things in registers.  */
9362
  matching_memory = 0;
9363
  if (MEM_P (dst))
9364
    {
9365
      if (rtx_equal_p (dst, src))
9366
        matching_memory = 1;
9367
      else
9368
        dst = gen_reg_rtx (mode);
9369
    }
9370
 
9371
  /* When source operand is memory, destination must match.  */
9372
  if (MEM_P (src) && !matching_memory)
9373
    src = force_reg (mode, src);
9374
 
9375
  /* Emit the instruction.  */
9376
 
9377
  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9378
  if (reload_in_progress || code == NOT)
9379
    {
9380
      /* Reload doesn't know about the flags register, and doesn't know that
9381
         it doesn't want to clobber it.  */
9382
      gcc_assert (code == NOT);
9383
      emit_insn (op);
9384
    }
9385
  else
9386
    {
9387
      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9388
      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9389
    }
9390
 
9391
  /* Fix up the destination if needed.  */
9392
  if (dst != operands[0])
9393
    emit_move_insn (operands[0], dst);
9394
}
9395
 
9396
/* Return TRUE or FALSE depending on whether the unary operator meets the
9397
   appropriate constraints.  */
9398
 
9399
int
9400
ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9401
                        enum machine_mode mode ATTRIBUTE_UNUSED,
9402
                        rtx operands[2] ATTRIBUTE_UNUSED)
9403
{
9404
  /* If one of operands is memory, source and destination must match.  */
9405
  if ((GET_CODE (operands[0]) == MEM
9406
       || GET_CODE (operands[1]) == MEM)
9407
      && ! rtx_equal_p (operands[0], operands[1]))
9408
    return FALSE;
9409
  return TRUE;
9410
}
9411
 
9412
/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9413
   Create a mask for the sign bit in MODE for an SSE register.  If VECT is
9414
   true, then replicate the mask for all elements of the vector register.
9415
   If INVERT is true, then create a mask excluding the sign bit.  */
9416
 
9417
rtx
9418
ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9419
{
9420
  enum machine_mode vec_mode;
9421
  HOST_WIDE_INT hi, lo;
9422
  int shift = 63;
9423
  rtvec v;
9424
  rtx mask;
9425
 
9426
  /* Find the sign bit, sign extended to 2*HWI.  */
9427
  if (mode == SFmode)
9428
    lo = 0x80000000, hi = lo < 0;
9429
  else if (HOST_BITS_PER_WIDE_INT >= 64)
9430
    lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9431
  else
9432
    lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9433
 
9434
  if (invert)
9435
    lo = ~lo, hi = ~hi;
9436
 
9437
  /* Force this value into the low part of a fp vector constant.  */
9438
  mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9439
  mask = gen_lowpart (mode, mask);
9440
 
9441
  if (mode == SFmode)
9442
    {
9443
      if (vect)
9444
        v = gen_rtvec (4, mask, mask, mask, mask);
9445
      else
9446
        v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9447
                       CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9448
      vec_mode = V4SFmode;
9449
    }
9450
  else
9451
    {
9452
      if (vect)
9453
        v = gen_rtvec (2, mask, mask);
9454
      else
9455
        v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9456
      vec_mode = V2DFmode;
9457
    }
9458
 
9459
  return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9460
}
9461
 
9462
/* Generate code for floating point ABS or NEG.  */
9463
 
9464
void
9465
ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9466
                                rtx operands[])
9467
{
9468
  rtx mask, set, use, clob, dst, src;
9469
  bool matching_memory;
9470
  bool use_sse = false;
9471
  bool vector_mode = VECTOR_MODE_P (mode);
9472
  enum machine_mode elt_mode = mode;
9473
 
9474
  if (vector_mode)
9475
    {
9476
      elt_mode = GET_MODE_INNER (mode);
9477
      use_sse = true;
9478
    }
9479
  else if (TARGET_SSE_MATH)
9480
    use_sse = SSE_FLOAT_MODE_P (mode);
9481
 
9482
  /* NEG and ABS performed with SSE use bitwise mask operations.
9483
     Create the appropriate mask now.  */
9484
  if (use_sse)
9485
    mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9486
  else
9487
    mask = NULL_RTX;
9488
 
9489
  dst = operands[0];
9490
  src = operands[1];
9491
 
9492
  /* If the destination is memory, and we don't have matching source
9493
     operands or we're using the x87, do things in registers.  */
9494
  matching_memory = false;
9495
  if (MEM_P (dst))
9496
    {
9497
      if (use_sse && rtx_equal_p (dst, src))
9498
        matching_memory = true;
9499
      else
9500
        dst = gen_reg_rtx (mode);
9501
    }
9502
  if (MEM_P (src) && !matching_memory)
9503
    src = force_reg (mode, src);
9504
 
9505
  if (vector_mode)
9506
    {
9507
      set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9508
      set = gen_rtx_SET (VOIDmode, dst, set);
9509
      emit_insn (set);
9510
    }
9511
  else
9512
    {
9513
      set = gen_rtx_fmt_e (code, mode, src);
9514
      set = gen_rtx_SET (VOIDmode, dst, set);
9515
      if (mask)
9516
        {
9517
          use = gen_rtx_USE (VOIDmode, mask);
9518
          clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9519
          emit_insn (gen_rtx_PARALLEL (VOIDmode,
9520
                                       gen_rtvec (3, set, use, clob)));
9521
        }
9522
      else
9523
        emit_insn (set);
9524
    }
9525
 
9526
  if (dst != operands[0])
9527
    emit_move_insn (operands[0], dst);
9528
}
9529
 
9530
/* Expand a copysign operation.  Special case operand 0 being a constant.  */
9531
 
9532
void
9533
ix86_expand_copysign (rtx operands[])
9534
{
9535
  enum machine_mode mode, vmode;
9536
  rtx dest, op0, op1, mask, nmask;
9537
 
9538
  dest = operands[0];
9539
  op0 = operands[1];
9540
  op1 = operands[2];
9541
 
9542
  mode = GET_MODE (dest);
9543
  vmode = mode == SFmode ? V4SFmode : V2DFmode;
9544
 
9545
  if (GET_CODE (op0) == CONST_DOUBLE)
9546
    {
9547
      rtvec v;
9548
 
9549
      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9550
        op0 = simplify_unary_operation (ABS, mode, op0, mode);
9551
 
9552
      if (op0 == CONST0_RTX (mode))
9553
        op0 = CONST0_RTX (vmode);
9554
      else
9555
        {
9556
          if (mode == SFmode)
9557
            v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9558
                           CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9559
          else
9560
            v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9561
          op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9562
        }
9563
 
9564
      mask = ix86_build_signbit_mask (mode, 0, 0);
9565
 
9566
      if (mode == SFmode)
9567
        emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9568
      else
9569
        emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9570
    }
9571
  else
9572
    {
9573
      nmask = ix86_build_signbit_mask (mode, 0, 1);
9574
      mask = ix86_build_signbit_mask (mode, 0, 0);
9575
 
9576
      if (mode == SFmode)
9577
        emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9578
      else
9579
        emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9580
    }
9581
}
9582
 
9583
/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
9584
   be a constant, and so has already been expanded into a vector constant.  */
9585
 
9586
void
9587
ix86_split_copysign_const (rtx operands[])
9588
{
9589
  enum machine_mode mode, vmode;
9590
  rtx dest, op0, op1, mask, x;
9591
 
9592
  dest = operands[0];
9593
  op0 = operands[1];
9594
  op1 = operands[2];
9595
  mask = operands[3];
9596
 
9597
  mode = GET_MODE (dest);
9598
  vmode = GET_MODE (mask);
9599
 
9600
  dest = simplify_gen_subreg (vmode, dest, mode, 0);
9601
  x = gen_rtx_AND (vmode, dest, mask);
9602
  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9603
 
9604
  if (op0 != CONST0_RTX (vmode))
9605
    {
9606
      x = gen_rtx_IOR (vmode, dest, op0);
9607
      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9608
    }
9609
}
9610
 
9611
/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
9612
   so we have to do two masks.  */
9613
 
9614
void
9615
ix86_split_copysign_var (rtx operands[])
9616
{
9617
  enum machine_mode mode, vmode;
9618
  rtx dest, scratch, op0, op1, mask, nmask, x;
9619
 
9620
  dest = operands[0];
9621
  scratch = operands[1];
9622
  op0 = operands[2];
9623
  op1 = operands[3];
9624
  nmask = operands[4];
9625
  mask = operands[5];
9626
 
9627
  mode = GET_MODE (dest);
9628
  vmode = GET_MODE (mask);
9629
 
9630
  if (rtx_equal_p (op0, op1))
9631
    {
9632
      /* Shouldn't happen often (it's useless, obviously), but when it does
9633
         we'd generate incorrect code if we continue below.  */
9634
      emit_move_insn (dest, op0);
9635
      return;
9636
    }
9637
 
9638
  if (REG_P (mask) && REGNO (dest) == REGNO (mask))     /* alternative 0 */
9639
    {
9640
      gcc_assert (REGNO (op1) == REGNO (scratch));
9641
 
9642
      x = gen_rtx_AND (vmode, scratch, mask);
9643
      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9644
 
9645
      dest = mask;
9646
      op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9647
      x = gen_rtx_NOT (vmode, dest);
9648
      x = gen_rtx_AND (vmode, x, op0);
9649
      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9650
    }
9651
  else
9652
    {
9653
      if (REGNO (op1) == REGNO (scratch))               /* alternative 1,3 */
9654
        {
9655
          x = gen_rtx_AND (vmode, scratch, mask);
9656
        }
9657
      else                                              /* alternative 2,4 */
9658
        {
9659
          gcc_assert (REGNO (mask) == REGNO (scratch));
9660
          op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9661
          x = gen_rtx_AND (vmode, scratch, op1);
9662
        }
9663
      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9664
 
9665
      if (REGNO (op0) == REGNO (dest))                  /* alternative 1,2 */
9666
        {
9667
          dest = simplify_gen_subreg (vmode, op0, mode, 0);
9668
          x = gen_rtx_AND (vmode, dest, nmask);
9669
        }
9670
      else                                              /* alternative 3,4 */
9671
        {
9672
          gcc_assert (REGNO (nmask) == REGNO (dest));
9673
          dest = nmask;
9674
          op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9675
          x = gen_rtx_AND (vmode, dest, op0);
9676
        }
9677
      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9678
    }
9679
 
9680
  x = gen_rtx_IOR (vmode, dest, scratch);
9681
  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9682
}
9683
 
9684
/* Return TRUE or FALSE depending on whether the first SET in INSN
9685
   has source and destination with matching CC modes, and that the
9686
   CC mode is at least as constrained as REQ_MODE.  */
9687
 
9688
int
9689
ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9690
{
9691
  rtx set;
9692
  enum machine_mode set_mode;
9693
 
9694
  set = PATTERN (insn);
9695
  if (GET_CODE (set) == PARALLEL)
9696
    set = XVECEXP (set, 0, 0);
9697
  gcc_assert (GET_CODE (set) == SET);
9698
  gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9699
 
9700
  set_mode = GET_MODE (SET_DEST (set));
9701
  switch (set_mode)
9702
    {
9703
    case CCNOmode:
9704
      if (req_mode != CCNOmode
9705
          && (req_mode != CCmode
9706
              || XEXP (SET_SRC (set), 1) != const0_rtx))
9707
        return 0;
9708
      break;
9709
    case CCmode:
9710
      if (req_mode == CCGCmode)
9711
        return 0;
9712
      /* FALLTHRU */
9713
    case CCGCmode:
9714
      if (req_mode == CCGOCmode || req_mode == CCNOmode)
9715
        return 0;
9716
      /* FALLTHRU */
9717
    case CCGOCmode:
9718
      if (req_mode == CCZmode)
9719
        return 0;
9720
      /* FALLTHRU */
9721
    case CCZmode:
9722
      break;
9723
 
9724
    default:
9725
      gcc_unreachable ();
9726
    }
9727
 
9728
  return (GET_MODE (SET_SRC (set)) == set_mode);
9729
}
9730
 
9731
/* Generate insn patterns to do an integer compare of OPERANDS.  */
9732
 
9733
static rtx
9734
ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9735
{
9736
  enum machine_mode cmpmode;
9737
  rtx tmp, flags;
9738
 
9739
  cmpmode = SELECT_CC_MODE (code, op0, op1);
9740
  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9741
 
9742
  /* This is very simple, but making the interface the same as in the
9743
     FP case makes the rest of the code easier.  */
9744
  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9745
  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9746
 
9747
  /* Return the test that should be put into the flags user, i.e.
9748
     the bcc, scc, or cmov instruction.  */
9749
  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9750
}
9751
 
9752
/* Figure out whether to use ordered or unordered fp comparisons.
9753
   Return the appropriate mode to use.  */
9754
 
9755
enum machine_mode
9756
ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9757
{
9758
  /* ??? In order to make all comparisons reversible, we do all comparisons
9759
     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
9760
     all forms trapping and nontrapping comparisons, we can make inequality
9761
     comparisons trapping again, since it results in better code when using
9762
     FCOM based compares.  */
9763
  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9764
}
9765
 
9766
enum machine_mode
9767
ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9768
{
9769
  if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9770
    return ix86_fp_compare_mode (code);
9771
  switch (code)
9772
    {
9773
      /* Only zero flag is needed.  */
9774
    case EQ:                    /* ZF=0 */
9775
    case NE:                    /* ZF!=0 */
9776
      return CCZmode;
9777
      /* Codes needing carry flag.  */
9778
    case GEU:                   /* CF=0 */
9779
    case GTU:                   /* CF=0 & ZF=0 */
9780
    case LTU:                   /* CF=1 */
9781
    case LEU:                   /* CF=1 | ZF=1 */
9782
      return CCmode;
9783
      /* Codes possibly doable only with sign flag when
9784
         comparing against zero.  */
9785
    case GE:                    /* SF=OF   or   SF=0 */
9786
    case LT:                    /* SF<>OF  or   SF=1 */
9787
      if (op1 == const0_rtx)
9788
        return CCGOCmode;
9789
      else
9790
        /* For other cases Carry flag is not required.  */
9791
        return CCGCmode;
9792
      /* Codes doable only with sign flag when comparing
9793
         against zero, but we miss jump instruction for it
9794
         so we need to use relational tests against overflow
9795
         that thus needs to be zero.  */
9796
    case GT:                    /* ZF=0 & SF=OF */
9797
    case LE:                    /* ZF=1 | SF<>OF */
9798
      if (op1 == const0_rtx)
9799
        return CCNOmode;
9800
      else
9801
        return CCGCmode;
9802
      /* strcmp pattern do (use flags) and combine may ask us for proper
9803
         mode.  */
9804
    case USE:
9805
      return CCmode;
9806
    default:
9807
      gcc_unreachable ();
9808
    }
9809
}
9810
 
9811
/* Return the fixed registers used for condition codes.  */
9812
 
9813
static bool
9814
ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9815
{
9816
  *p1 = FLAGS_REG;
9817
  *p2 = FPSR_REG;
9818
  return true;
9819
}
9820
 
9821
/* If two condition code modes are compatible, return a condition code
9822
   mode which is compatible with both.  Otherwise, return
9823
   VOIDmode.  */
9824
 
9825
static enum machine_mode
9826
ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9827
{
9828
  if (m1 == m2)
9829
    return m1;
9830
 
9831
  if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9832
    return VOIDmode;
9833
 
9834
  if ((m1 == CCGCmode && m2 == CCGOCmode)
9835
      || (m1 == CCGOCmode && m2 == CCGCmode))
9836
    return CCGCmode;
9837
 
9838
  switch (m1)
9839
    {
9840
    default:
9841
      gcc_unreachable ();
9842
 
9843
    case CCmode:
9844
    case CCGCmode:
9845
    case CCGOCmode:
9846
    case CCNOmode:
9847
    case CCZmode:
9848
      switch (m2)
9849
        {
9850
        default:
9851
          return VOIDmode;
9852
 
9853
        case CCmode:
9854
        case CCGCmode:
9855
        case CCGOCmode:
9856
        case CCNOmode:
9857
        case CCZmode:
9858
          return CCmode;
9859
        }
9860
 
9861
    case CCFPmode:
9862
    case CCFPUmode:
9863
      /* These are only compatible with themselves, which we already
9864
         checked above.  */
9865
      return VOIDmode;
9866
    }
9867
}
9868
 
9869
/* Return true if we should use an FCOMI instruction for this fp comparison.  */
9870
 
9871
int
9872
ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9873
{
9874
  enum rtx_code swapped_code = swap_condition (code);
9875
  return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9876
          || (ix86_fp_comparison_cost (swapped_code)
9877
              == ix86_fp_comparison_fcomi_cost (swapped_code)));
9878
}
9879
 
9880
/* Swap, force into registers, or otherwise massage the two operands
9881
   to a fp comparison.  The operands are updated in place; the new
9882
   comparison code is returned.  */
9883
 
9884
static enum rtx_code
9885
ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9886
{
9887
  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9888
  rtx op0 = *pop0, op1 = *pop1;
9889
  enum machine_mode op_mode = GET_MODE (op0);
9890
  int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9891
 
9892
  /* All of the unordered compare instructions only work on registers.
9893
     The same is true of the fcomi compare instructions.  The XFmode
9894
     compare instructions require registers except when comparing
9895
     against zero or when converting operand 1 from fixed point to
9896
     floating point.  */
9897
 
9898
  if (!is_sse
9899
      && (fpcmp_mode == CCFPUmode
9900
          || (op_mode == XFmode
9901
              && ! (standard_80387_constant_p (op0) == 1
9902
                    || standard_80387_constant_p (op1) == 1)
9903
              && GET_CODE (op1) != FLOAT)
9904
          || ix86_use_fcomi_compare (code)))
9905
    {
9906
      op0 = force_reg (op_mode, op0);
9907
      op1 = force_reg (op_mode, op1);
9908
    }
9909
  else
9910
    {
9911
      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
9912
         things around if they appear profitable, otherwise force op0
9913
         into a register.  */
9914
 
9915
      if (standard_80387_constant_p (op0) == 0
9916
          || (GET_CODE (op0) == MEM
9917
              && ! (standard_80387_constant_p (op1) == 0
9918
                    || GET_CODE (op1) == MEM)))
9919
        {
9920
          rtx tmp;
9921
          tmp = op0, op0 = op1, op1 = tmp;
9922
          code = swap_condition (code);
9923
        }
9924
 
9925
      if (GET_CODE (op0) != REG)
9926
        op0 = force_reg (op_mode, op0);
9927
 
9928
      if (CONSTANT_P (op1))
9929
        {
9930
          int tmp = standard_80387_constant_p (op1);
9931
          if (tmp == 0)
9932
            op1 = validize_mem (force_const_mem (op_mode, op1));
9933
          else if (tmp == 1)
9934
            {
9935
              if (TARGET_CMOVE)
9936
                op1 = force_reg (op_mode, op1);
9937
            }
9938
          else
9939
            op1 = force_reg (op_mode, op1);
9940
        }
9941
    }
9942
 
9943
  /* Try to rearrange the comparison to make it cheaper.  */
9944
  if (ix86_fp_comparison_cost (code)
9945
      > ix86_fp_comparison_cost (swap_condition (code))
9946
      && (GET_CODE (op1) == REG || !no_new_pseudos))
9947
    {
9948
      rtx tmp;
9949
      tmp = op0, op0 = op1, op1 = tmp;
9950
      code = swap_condition (code);
9951
      if (GET_CODE (op0) != REG)
9952
        op0 = force_reg (op_mode, op0);
9953
    }
9954
 
9955
  *pop0 = op0;
9956
  *pop1 = op1;
9957
  return code;
9958
}
9959
 
9960
/* Convert comparison codes we use to represent FP comparison to integer
9961
   code that will result in proper branch.  Return UNKNOWN if no such code
9962
   is available.  */
9963
 
9964
enum rtx_code
9965
ix86_fp_compare_code_to_integer (enum rtx_code code)
9966
{
9967
  switch (code)
9968
    {
9969
    case GT:
9970
      return GTU;
9971
    case GE:
9972
      return GEU;
9973
    case ORDERED:
9974
    case UNORDERED:
9975
      return code;
9976
      break;
9977
    case UNEQ:
9978
      return EQ;
9979
      break;
9980
    case UNLT:
9981
      return LTU;
9982
      break;
9983
    case UNLE:
9984
      return LEU;
9985
      break;
9986
    case LTGT:
9987
      return NE;
9988
      break;
9989
    default:
9990
      return UNKNOWN;
9991
    }
9992
}
9993
 
9994
/* Split comparison code CODE into comparisons we can do using branch
9995
   instructions.  BYPASS_CODE is comparison code for branch that will
9996
   branch around FIRST_CODE and SECOND_CODE.  If some of branches
9997
   is not required, set value to UNKNOWN.
9998
   We never require more than two branches.  */
9999
 
10000
void
10001
ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10002
                          enum rtx_code *first_code,
10003
                          enum rtx_code *second_code)
10004
{
10005
  *first_code = code;
10006
  *bypass_code = UNKNOWN;
10007
  *second_code = UNKNOWN;
10008
 
10009
  /* The fcomi comparison sets flags as follows:
10010
 
10011
     cmp    ZF PF CF
10012
     >      0  0  0
10013
     <      0  0  1
10014
     =      1  0  0
10015
     un     1  1  1 */
10016
 
10017
  switch (code)
10018
    {
10019
    case GT:                    /* GTU - CF=0 & ZF=0 */
10020
    case GE:                    /* GEU - CF=0 */
10021
    case ORDERED:               /* PF=0 */
10022
    case UNORDERED:             /* PF=1 */
10023
    case UNEQ:                  /* EQ - ZF=1 */
10024
    case UNLT:                  /* LTU - CF=1 */
10025
    case UNLE:                  /* LEU - CF=1 | ZF=1 */
10026
    case LTGT:                  /* EQ - ZF=0 */
10027
      break;
10028
    case LT:                    /* LTU - CF=1 - fails on unordered */
10029
      *first_code = UNLT;
10030
      *bypass_code = UNORDERED;
10031
      break;
10032
    case LE:                    /* LEU - CF=1 | ZF=1 - fails on unordered */
10033
      *first_code = UNLE;
10034
      *bypass_code = UNORDERED;
10035
      break;
10036
    case EQ:                    /* EQ - ZF=1 - fails on unordered */
10037
      *first_code = UNEQ;
10038
      *bypass_code = UNORDERED;
10039
      break;
10040
    case NE:                    /* NE - ZF=0 - fails on unordered */
10041
      *first_code = LTGT;
10042
      *second_code = UNORDERED;
10043
      break;
10044
    case UNGE:                  /* GEU - CF=0 - fails on unordered */
10045
      *first_code = GE;
10046
      *second_code = UNORDERED;
10047
      break;
10048
    case UNGT:                  /* GTU - CF=0 & ZF=0 - fails on unordered */
10049
      *first_code = GT;
10050
      *second_code = UNORDERED;
10051
      break;
10052
    default:
10053
      gcc_unreachable ();
10054
    }
10055
  if (!TARGET_IEEE_FP)
10056
    {
10057
      *second_code = UNKNOWN;
10058
      *bypass_code = UNKNOWN;
10059
    }
10060
}
10061
 
10062
/* Return cost of comparison done fcom + arithmetics operations on AX.
10063
   All following functions do use number of instructions as a cost metrics.
10064
   In future this should be tweaked to compute bytes for optimize_size and
10065
   take into account performance of various instructions on various CPUs.  */
10066
static int
10067
ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10068
{
10069
  if (!TARGET_IEEE_FP)
10070
    return 4;
10071
  /* The cost of code output by ix86_expand_fp_compare.  */
10072
  switch (code)
10073
    {
10074
    case UNLE:
10075
    case UNLT:
10076
    case LTGT:
10077
    case GT:
10078
    case GE:
10079
    case UNORDERED:
10080
    case ORDERED:
10081
    case UNEQ:
10082
      return 4;
10083
      break;
10084
    case LT:
10085
    case NE:
10086
    case EQ:
10087
    case UNGE:
10088
      return 5;
10089
      break;
10090
    case LE:
10091
    case UNGT:
10092
      return 6;
10093
      break;
10094
    default:
10095
      gcc_unreachable ();
10096
    }
10097
}
10098
 
10099
/* Return cost of comparison done using fcomi operation.
10100
   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10101
static int
10102
ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10103
{
10104
  enum rtx_code bypass_code, first_code, second_code;
10105
  /* Return arbitrarily high cost when instruction is not supported - this
10106
     prevents gcc from using it.  */
10107
  if (!TARGET_CMOVE)
10108
    return 1024;
10109
  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10110
  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10111
}
10112
 
10113
/* Return cost of comparison done using sahf operation.
10114
   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10115
static int
10116
ix86_fp_comparison_sahf_cost (enum rtx_code code)
10117
{
10118
  enum rtx_code bypass_code, first_code, second_code;
10119
  /* Return arbitrarily high cost when instruction is not preferred - this
10120
     avoids gcc from using it.  */
10121
  if (!TARGET_USE_SAHF && !optimize_size)
10122
    return 1024;
10123
  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10124
  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10125
}
10126
 
10127
/* Compute cost of the comparison done using any method.
10128
   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10129
static int
10130
ix86_fp_comparison_cost (enum rtx_code code)
10131
{
10132
  int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10133
  int min;
10134
 
10135
  fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10136
  sahf_cost = ix86_fp_comparison_sahf_cost (code);
10137
 
10138
  min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10139
  if (min > sahf_cost)
10140
    min = sahf_cost;
10141
  if (min > fcomi_cost)
10142
    min = fcomi_cost;
10143
  return min;
10144
}
10145
 
10146
/* Generate insn patterns to do a floating point compare of OPERANDS.  */
10147
 
10148
static rtx
10149
ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10150
                        rtx *second_test, rtx *bypass_test)
10151
{
10152
  enum machine_mode fpcmp_mode, intcmp_mode;
10153
  rtx tmp, tmp2;
10154
  int cost = ix86_fp_comparison_cost (code);
10155
  enum rtx_code bypass_code, first_code, second_code;
10156
 
10157
  fpcmp_mode = ix86_fp_compare_mode (code);
10158
  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10159
 
10160
  if (second_test)
10161
    *second_test = NULL_RTX;
10162
  if (bypass_test)
10163
    *bypass_test = NULL_RTX;
10164
 
10165
  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10166
 
10167
  /* Do fcomi/sahf based test when profitable.  */
10168
  if ((bypass_code == UNKNOWN || bypass_test)
10169
      && (second_code == UNKNOWN || second_test)
10170
      && ix86_fp_comparison_arithmetics_cost (code) > cost)
10171
    {
10172
      if (TARGET_CMOVE)
10173
        {
10174
          tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10175
          tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10176
                             tmp);
10177
          emit_insn (tmp);
10178
        }
10179
      else
10180
        {
10181
          tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10182
          tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10183
          if (!scratch)
10184
            scratch = gen_reg_rtx (HImode);
10185
          emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10186
          emit_insn (gen_x86_sahf_1 (scratch));
10187
        }
10188
 
10189
      /* The FP codes work out to act like unsigned.  */
10190
      intcmp_mode = fpcmp_mode;
10191
      code = first_code;
10192
      if (bypass_code != UNKNOWN)
10193
        *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10194
                                       gen_rtx_REG (intcmp_mode, FLAGS_REG),
10195
                                       const0_rtx);
10196
      if (second_code != UNKNOWN)
10197
        *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10198
                                       gen_rtx_REG (intcmp_mode, FLAGS_REG),
10199
                                       const0_rtx);
10200
    }
10201
  else
10202
    {
10203
      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
10204
      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10205
      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10206
      if (!scratch)
10207
        scratch = gen_reg_rtx (HImode);
10208
      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10209
 
10210
      /* In the unordered case, we have to check C2 for NaN's, which
10211
         doesn't happen to work out to anything nice combination-wise.
10212
         So do some bit twiddling on the value we've got in AH to come
10213
         up with an appropriate set of condition codes.  */
10214
 
10215
      intcmp_mode = CCNOmode;
10216
      switch (code)
10217
        {
10218
        case GT:
10219
        case UNGT:
10220
          if (code == GT || !TARGET_IEEE_FP)
10221
            {
10222
              emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10223
              code = EQ;
10224
            }
10225
          else
10226
            {
10227
              emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10228
              emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10229
              emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10230
              intcmp_mode = CCmode;
10231
              code = GEU;
10232
            }
10233
          break;
10234
        case LT:
10235
        case UNLT:
10236
          if (code == LT && TARGET_IEEE_FP)
10237
            {
10238
              emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10239
              emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10240
              intcmp_mode = CCmode;
10241
              code = EQ;
10242
            }
10243
          else
10244
            {
10245
              emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10246
              code = NE;
10247
            }
10248
          break;
10249
        case GE:
10250
        case UNGE:
10251
          if (code == GE || !TARGET_IEEE_FP)
10252
            {
10253
              emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10254
              code = EQ;
10255
            }
10256
          else
10257
            {
10258
              emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10259
              emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10260
                                             GEN_INT (0x01)));
10261
              code = NE;
10262
            }
10263
          break;
10264
        case LE:
10265
        case UNLE:
10266
          if (code == LE && TARGET_IEEE_FP)
10267
            {
10268
              emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10269
              emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10270
              emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10271
              intcmp_mode = CCmode;
10272
              code = LTU;
10273
            }
10274
          else
10275
            {
10276
              emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10277
              code = NE;
10278
            }
10279
          break;
10280
        case EQ:
10281
        case UNEQ:
10282
          if (code == EQ && TARGET_IEEE_FP)
10283
            {
10284
              emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10285
              emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10286
              intcmp_mode = CCmode;
10287
              code = EQ;
10288
            }
10289
          else
10290
            {
10291
              emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10292
              code = NE;
10293
              break;
10294
            }
10295
          break;
10296
        case NE:
10297
        case LTGT:
10298
          if (code == NE && TARGET_IEEE_FP)
10299
            {
10300
              emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10301
              emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10302
                                             GEN_INT (0x40)));
10303
              code = NE;
10304
            }
10305
          else
10306
            {
10307
              emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10308
              code = EQ;
10309
            }
10310
          break;
10311
 
10312
        case UNORDERED:
10313
          emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10314
          code = NE;
10315
          break;
10316
        case ORDERED:
10317
          emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10318
          code = EQ;
10319
          break;
10320
 
10321
        default:
10322
          gcc_unreachable ();
10323
        }
10324
    }
10325
 
10326
  /* Return the test that should be put into the flags user, i.e.
10327
     the bcc, scc, or cmov instruction.  */
10328
  return gen_rtx_fmt_ee (code, VOIDmode,
10329
                         gen_rtx_REG (intcmp_mode, FLAGS_REG),
10330
                         const0_rtx);
10331
}
10332
 
10333
rtx
10334
ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10335
{
10336
  rtx op0, op1, ret;
10337
  op0 = ix86_compare_op0;
10338
  op1 = ix86_compare_op1;
10339
 
10340
  if (second_test)
10341
    *second_test = NULL_RTX;
10342
  if (bypass_test)
10343
    *bypass_test = NULL_RTX;
10344
 
10345
  if (ix86_compare_emitted)
10346
    {
10347
      ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10348
      ix86_compare_emitted = NULL_RTX;
10349
    }
10350
  else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10351
    ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10352
                                  second_test, bypass_test);
10353
  else
10354
    ret = ix86_expand_int_compare (code, op0, op1);
10355
 
10356
  return ret;
10357
}
10358
 
10359
/* Return true if the CODE will result in nontrivial jump sequence.  */
10360
bool
10361
ix86_fp_jump_nontrivial_p (enum rtx_code code)
10362
{
10363
  enum rtx_code bypass_code, first_code, second_code;
10364
  if (!TARGET_CMOVE)
10365
    return true;
10366
  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10367
  return bypass_code != UNKNOWN || second_code != UNKNOWN;
10368
}
10369
 
10370
void
10371
ix86_expand_branch (enum rtx_code code, rtx label)
10372
{
10373
  rtx tmp;
10374
 
10375
  /* If we have emitted a compare insn, go straight to simple.
10376
     ix86_expand_compare won't emit anything if ix86_compare_emitted
10377
     is non NULL.  */
10378
  if (ix86_compare_emitted)
10379
    goto simple;
10380
 
10381
  switch (GET_MODE (ix86_compare_op0))
10382
    {
10383
    case QImode:
10384
    case HImode:
10385
    case SImode:
10386
      simple:
10387
      tmp = ix86_expand_compare (code, NULL, NULL);
10388
      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10389
                                  gen_rtx_LABEL_REF (VOIDmode, label),
10390
                                  pc_rtx);
10391
      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10392
      return;
10393
 
10394
    case SFmode:
10395
    case DFmode:
10396
    case XFmode:
10397
      {
10398
        rtvec vec;
10399
        int use_fcomi;
10400
        enum rtx_code bypass_code, first_code, second_code;
10401
 
10402
        code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10403
                                             &ix86_compare_op1);
10404
 
10405
        ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10406
 
10407
        /* Check whether we will use the natural sequence with one jump.  If
10408
           so, we can expand jump early.  Otherwise delay expansion by
10409
           creating compound insn to not confuse optimizers.  */
10410
        if (bypass_code == UNKNOWN && second_code == UNKNOWN
10411
            && TARGET_CMOVE)
10412
          {
10413
            ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10414
                                  gen_rtx_LABEL_REF (VOIDmode, label),
10415
                                  pc_rtx, NULL_RTX, NULL_RTX);
10416
          }
10417
        else
10418
          {
10419
            tmp = gen_rtx_fmt_ee (code, VOIDmode,
10420
                                  ix86_compare_op0, ix86_compare_op1);
10421
            tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10422
                                        gen_rtx_LABEL_REF (VOIDmode, label),
10423
                                        pc_rtx);
10424
            tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10425
 
10426
            use_fcomi = ix86_use_fcomi_compare (code);
10427
            vec = rtvec_alloc (3 + !use_fcomi);
10428
            RTVEC_ELT (vec, 0) = tmp;
10429
            RTVEC_ELT (vec, 1)
10430
              = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10431
            RTVEC_ELT (vec, 2)
10432
              = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10433
            if (! use_fcomi)
10434
              RTVEC_ELT (vec, 3)
10435
                = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10436
 
10437
            emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10438
          }
10439
        return;
10440
      }
10441
 
10442
    case DImode:
10443
      if (TARGET_64BIT)
10444
        goto simple;
10445
    case TImode:
10446
      /* Expand DImode branch into multiple compare+branch.  */
10447
      {
10448
        rtx lo[2], hi[2], label2;
10449
        enum rtx_code code1, code2, code3;
10450
        enum machine_mode submode;
10451
 
10452
        if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10453
          {
10454
            tmp = ix86_compare_op0;
10455
            ix86_compare_op0 = ix86_compare_op1;
10456
            ix86_compare_op1 = tmp;
10457
            code = swap_condition (code);
10458
          }
10459
        if (GET_MODE (ix86_compare_op0) == DImode)
10460
          {
10461
            split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10462
            split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10463
            submode = SImode;
10464
          }
10465
        else
10466
          {
10467
            split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10468
            split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10469
            submode = DImode;
10470
          }
10471
 
10472
        /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10473
           avoid two branches.  This costs one extra insn, so disable when
10474
           optimizing for size.  */
10475
 
10476
        if ((code == EQ || code == NE)
10477
            && (!optimize_size
10478
                || hi[1] == const0_rtx || lo[1] == const0_rtx))
10479
          {
10480
            rtx xor0, xor1;
10481
 
10482
            xor1 = hi[0];
10483
            if (hi[1] != const0_rtx)
10484
              xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10485
                                   NULL_RTX, 0, OPTAB_WIDEN);
10486
 
10487
            xor0 = lo[0];
10488
            if (lo[1] != const0_rtx)
10489
              xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10490
                                   NULL_RTX, 0, OPTAB_WIDEN);
10491
 
10492
            tmp = expand_binop (submode, ior_optab, xor1, xor0,
10493
                                NULL_RTX, 0, OPTAB_WIDEN);
10494
 
10495
            ix86_compare_op0 = tmp;
10496
            ix86_compare_op1 = const0_rtx;
10497
            ix86_expand_branch (code, label);
10498
            return;
10499
          }
10500
 
10501
        /* Otherwise, if we are doing less-than or greater-or-equal-than,
10502
           op1 is a constant and the low word is zero, then we can just
10503
           examine the high word.  */
10504
 
10505
        if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10506
          switch (code)
10507
            {
10508
            case LT: case LTU: case GE: case GEU:
10509
              ix86_compare_op0 = hi[0];
10510
              ix86_compare_op1 = hi[1];
10511
              ix86_expand_branch (code, label);
10512
              return;
10513
            default:
10514
              break;
10515
            }
10516
 
10517
        /* Otherwise, we need two or three jumps.  */
10518
 
10519
        label2 = gen_label_rtx ();
10520
 
10521
        code1 = code;
10522
        code2 = swap_condition (code);
10523
        code3 = unsigned_condition (code);
10524
 
10525
        switch (code)
10526
          {
10527
          case LT: case GT: case LTU: case GTU:
10528
            break;
10529
 
10530
          case LE:   code1 = LT;  code2 = GT;  break;
10531
          case GE:   code1 = GT;  code2 = LT;  break;
10532
          case LEU:  code1 = LTU; code2 = GTU; break;
10533
          case GEU:  code1 = GTU; code2 = LTU; break;
10534
 
10535
          case EQ:   code1 = UNKNOWN; code2 = NE;  break;
10536
          case NE:   code2 = UNKNOWN; break;
10537
 
10538
          default:
10539
            gcc_unreachable ();
10540
          }
10541
 
10542
        /*
10543
         * a < b =>
10544
         *    if (hi(a) < hi(b)) goto true;
10545
         *    if (hi(a) > hi(b)) goto false;
10546
         *    if (lo(a) < lo(b)) goto true;
10547
         *  false:
10548
         */
10549
 
10550
        ix86_compare_op0 = hi[0];
10551
        ix86_compare_op1 = hi[1];
10552
 
10553
        if (code1 != UNKNOWN)
10554
          ix86_expand_branch (code1, label);
10555
        if (code2 != UNKNOWN)
10556
          ix86_expand_branch (code2, label2);
10557
 
10558
        ix86_compare_op0 = lo[0];
10559
        ix86_compare_op1 = lo[1];
10560
        ix86_expand_branch (code3, label);
10561
 
10562
        if (code2 != UNKNOWN)
10563
          emit_label (label2);
10564
        return;
10565
      }
10566
 
10567
    default:
10568
      gcc_unreachable ();
10569
    }
10570
}
10571
 
10572
/* Split branch based on floating point condition.  */
10573
void
10574
ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10575
                      rtx target1, rtx target2, rtx tmp, rtx pushed)
10576
{
10577
  rtx second, bypass;
10578
  rtx label = NULL_RTX;
10579
  rtx condition;
10580
  int bypass_probability = -1, second_probability = -1, probability = -1;
10581
  rtx i;
10582
 
10583
  if (target2 != pc_rtx)
10584
    {
10585
      rtx tmp = target2;
10586
      code = reverse_condition_maybe_unordered (code);
10587
      target2 = target1;
10588
      target1 = tmp;
10589
    }
10590
 
10591
  condition = ix86_expand_fp_compare (code, op1, op2,
10592
                                      tmp, &second, &bypass);
10593
 
10594
  /* Remove pushed operand from stack.  */
10595
  if (pushed)
10596
    ix86_free_from_memory (GET_MODE (pushed));
10597
 
10598
  if (split_branch_probability >= 0)
10599
    {
10600
      /* Distribute the probabilities across the jumps.
10601
         Assume the BYPASS and SECOND to be always test
10602
         for UNORDERED.  */
10603
      probability = split_branch_probability;
10604
 
10605
      /* Value of 1 is low enough to make no need for probability
10606
         to be updated.  Later we may run some experiments and see
10607
         if unordered values are more frequent in practice.  */
10608
      if (bypass)
10609
        bypass_probability = 1;
10610
      if (second)
10611
        second_probability = 1;
10612
    }
10613
  if (bypass != NULL_RTX)
10614
    {
10615
      label = gen_label_rtx ();
10616
      i = emit_jump_insn (gen_rtx_SET
10617
                          (VOIDmode, pc_rtx,
10618
                           gen_rtx_IF_THEN_ELSE (VOIDmode,
10619
                                                 bypass,
10620
                                                 gen_rtx_LABEL_REF (VOIDmode,
10621
                                                                    label),
10622
                                                 pc_rtx)));
10623
      if (bypass_probability >= 0)
10624
        REG_NOTES (i)
10625
          = gen_rtx_EXPR_LIST (REG_BR_PROB,
10626
                               GEN_INT (bypass_probability),
10627
                               REG_NOTES (i));
10628
    }
10629
  i = emit_jump_insn (gen_rtx_SET
10630
                      (VOIDmode, pc_rtx,
10631
                       gen_rtx_IF_THEN_ELSE (VOIDmode,
10632
                                             condition, target1, target2)));
10633
  if (probability >= 0)
10634
    REG_NOTES (i)
10635
      = gen_rtx_EXPR_LIST (REG_BR_PROB,
10636
                           GEN_INT (probability),
10637
                           REG_NOTES (i));
10638
  if (second != NULL_RTX)
10639
    {
10640
      i = emit_jump_insn (gen_rtx_SET
10641
                          (VOIDmode, pc_rtx,
10642
                           gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10643
                                                 target2)));
10644
      if (second_probability >= 0)
10645
        REG_NOTES (i)
10646
          = gen_rtx_EXPR_LIST (REG_BR_PROB,
10647
                               GEN_INT (second_probability),
10648
                               REG_NOTES (i));
10649
    }
10650
  if (label != NULL_RTX)
10651
    emit_label (label);
10652
}
10653
 
10654
int
10655
ix86_expand_setcc (enum rtx_code code, rtx dest)
10656
{
10657
  rtx ret, tmp, tmpreg, equiv;
10658
  rtx second_test, bypass_test;
10659
 
10660
  if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10661
    return 0; /* FAIL */
10662
 
10663
  gcc_assert (GET_MODE (dest) == QImode);
10664
 
10665
  ret = ix86_expand_compare (code, &second_test, &bypass_test);
10666
  PUT_MODE (ret, QImode);
10667
 
10668
  tmp = dest;
10669
  tmpreg = dest;
10670
 
10671
  emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10672
  if (bypass_test || second_test)
10673
    {
10674
      rtx test = second_test;
10675
      int bypass = 0;
10676
      rtx tmp2 = gen_reg_rtx (QImode);
10677
      if (bypass_test)
10678
        {
10679
          gcc_assert (!second_test);
10680
          test = bypass_test;
10681
          bypass = 1;
10682
          PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10683
        }
10684
      PUT_MODE (test, QImode);
10685
      emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10686
 
10687
      if (bypass)
10688
        emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10689
      else
10690
        emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10691
    }
10692
 
10693
  /* Attach a REG_EQUAL note describing the comparison result.  */
10694
  if (ix86_compare_op0 && ix86_compare_op1)
10695
    {
10696
      equiv = simplify_gen_relational (code, QImode,
10697
                                       GET_MODE (ix86_compare_op0),
10698
                                       ix86_compare_op0, ix86_compare_op1);
10699
      set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10700
    }
10701
 
10702
  return 1; /* DONE */
10703
}
10704
 
10705
/* Expand comparison setting or clearing carry flag.  Return true when
10706
   successful and set pop for the operation.  */
10707
static bool
10708
ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10709
{
10710
  enum machine_mode mode =
10711
    GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10712
 
10713
  /* Do not handle DImode compares that go through special path.  Also we can't
10714
     deal with FP compares yet.  This is possible to add.  */
10715
  if (mode == (TARGET_64BIT ? TImode : DImode))
10716
    return false;
10717
  if (FLOAT_MODE_P (mode))
10718
    {
10719
      rtx second_test = NULL, bypass_test = NULL;
10720
      rtx compare_op, compare_seq;
10721
 
10722
      /* Shortcut:  following common codes never translate into carry flag compares.  */
10723
      if (code == EQ || code == NE || code == UNEQ || code == LTGT
10724
          || code == ORDERED || code == UNORDERED)
10725
        return false;
10726
 
10727
      /* These comparisons require zero flag; swap operands so they won't.  */
10728
      if ((code == GT || code == UNLE || code == LE || code == UNGT)
10729
          && !TARGET_IEEE_FP)
10730
        {
10731
          rtx tmp = op0;
10732
          op0 = op1;
10733
          op1 = tmp;
10734
          code = swap_condition (code);
10735
        }
10736
 
10737
      /* Try to expand the comparison and verify that we end up with carry flag
10738
         based comparison.  This is fails to be true only when we decide to expand
10739
         comparison using arithmetic that is not too common scenario.  */
10740
      start_sequence ();
10741
      compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10742
                                           &second_test, &bypass_test);
10743
      compare_seq = get_insns ();
10744
      end_sequence ();
10745
 
10746
      if (second_test || bypass_test)
10747
        return false;
10748
      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10749
          || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10750
        code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10751
      else
10752
        code = GET_CODE (compare_op);
10753
      if (code != LTU && code != GEU)
10754
        return false;
10755
      emit_insn (compare_seq);
10756
      *pop = compare_op;
10757
      return true;
10758
    }
10759
  if (!INTEGRAL_MODE_P (mode))
10760
    return false;
10761
  switch (code)
10762
    {
10763
    case LTU:
10764
    case GEU:
10765
      break;
10766
 
10767
    /* Convert a==0 into (unsigned)a<1.  */
10768
    case EQ:
10769
    case NE:
10770
      if (op1 != const0_rtx)
10771
        return false;
10772
      op1 = const1_rtx;
10773
      code = (code == EQ ? LTU : GEU);
10774
      break;
10775
 
10776
    /* Convert a>b into b<a or a>=b-1.  */
10777
    case GTU:
10778
    case LEU:
10779
      if (GET_CODE (op1) == CONST_INT)
10780
        {
10781
          op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10782
          /* Bail out on overflow.  We still can swap operands but that
10783
             would force loading of the constant into register.  */
10784
          if (op1 == const0_rtx
10785
              || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10786
            return false;
10787
          code = (code == GTU ? GEU : LTU);
10788
        }
10789
      else
10790
        {
10791
          rtx tmp = op1;
10792
          op1 = op0;
10793
          op0 = tmp;
10794
          code = (code == GTU ? LTU : GEU);
10795
        }
10796
      break;
10797
 
10798
    /* Convert a>=0 into (unsigned)a<0x80000000.  */
10799
    case LT:
10800
    case GE:
10801
      if (mode == DImode || op1 != const0_rtx)
10802
        return false;
10803
      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10804
      code = (code == LT ? GEU : LTU);
10805
      break;
10806
    case LE:
10807
    case GT:
10808
      if (mode == DImode || op1 != constm1_rtx)
10809
        return false;
10810
      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10811
      code = (code == LE ? GEU : LTU);
10812
      break;
10813
 
10814
    default:
10815
      return false;
10816
    }
10817
  /* Swapping operands may cause constant to appear as first operand.  */
10818
  if (!nonimmediate_operand (op0, VOIDmode))
10819
    {
10820
      if (no_new_pseudos)
10821
        return false;
10822
      op0 = force_reg (mode, op0);
10823
    }
10824
  ix86_compare_op0 = op0;
10825
  ix86_compare_op1 = op1;
10826
  *pop = ix86_expand_compare (code, NULL, NULL);
10827
  gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10828
  return true;
10829
}
10830
 
10831
int
10832
ix86_expand_int_movcc (rtx operands[])
10833
{
10834
  enum rtx_code code = GET_CODE (operands[1]), compare_code;
10835
  rtx compare_seq, compare_op;
10836
  rtx second_test, bypass_test;
10837
  enum machine_mode mode = GET_MODE (operands[0]);
10838
  bool sign_bit_compare_p = false;;
10839
 
10840
  start_sequence ();
10841
  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10842
  compare_seq = get_insns ();
10843
  end_sequence ();
10844
 
10845
  compare_code = GET_CODE (compare_op);
10846
 
10847
  if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10848
      || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10849
    sign_bit_compare_p = true;
10850
 
10851
  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10852
     HImode insns, we'd be swallowed in word prefix ops.  */
10853
 
10854
  if ((mode != HImode || TARGET_FAST_PREFIX)
10855
      && (mode != (TARGET_64BIT ? TImode : DImode))
10856
      && GET_CODE (operands[2]) == CONST_INT
10857
      && GET_CODE (operands[3]) == CONST_INT)
10858
    {
10859
      rtx out = operands[0];
10860
      HOST_WIDE_INT ct = INTVAL (operands[2]);
10861
      HOST_WIDE_INT cf = INTVAL (operands[3]);
10862
      HOST_WIDE_INT diff;
10863
 
10864
      diff = ct - cf;
10865
      /*  Sign bit compares are better done using shifts than we do by using
10866
          sbb.  */
10867
      if (sign_bit_compare_p
10868
          || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10869
                                             ix86_compare_op1, &compare_op))
10870
        {
10871
          /* Detect overlap between destination and compare sources.  */
10872
          rtx tmp = out;
10873
 
10874
          if (!sign_bit_compare_p)
10875
            {
10876
              bool fpcmp = false;
10877
 
10878
              compare_code = GET_CODE (compare_op);
10879
 
10880
              if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10881
                  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10882
                {
10883
                  fpcmp = true;
10884
                  compare_code = ix86_fp_compare_code_to_integer (compare_code);
10885
                }
10886
 
10887
              /* To simplify rest of code, restrict to the GEU case.  */
10888
              if (compare_code == LTU)
10889
                {
10890
                  HOST_WIDE_INT tmp = ct;
10891
                  ct = cf;
10892
                  cf = tmp;
10893
                  compare_code = reverse_condition (compare_code);
10894
                  code = reverse_condition (code);
10895
                }
10896
              else
10897
                {
10898
                  if (fpcmp)
10899
                    PUT_CODE (compare_op,
10900
                              reverse_condition_maybe_unordered
10901
                                (GET_CODE (compare_op)));
10902
                  else
10903
                    PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10904
                }
10905
              diff = ct - cf;
10906
 
10907
              if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10908
                  || reg_overlap_mentioned_p (out, ix86_compare_op1))
10909
                tmp = gen_reg_rtx (mode);
10910
 
10911
              if (mode == DImode)
10912
                emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10913
              else
10914
                emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10915
            }
10916
          else
10917
            {
10918
              if (code == GT || code == GE)
10919
                code = reverse_condition (code);
10920
              else
10921
                {
10922
                  HOST_WIDE_INT tmp = ct;
10923
                  ct = cf;
10924
                  cf = tmp;
10925
                  diff = ct - cf;
10926
                }
10927
              tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10928
                                     ix86_compare_op1, VOIDmode, 0, -1);
10929
            }
10930
 
10931
          if (diff == 1)
10932
            {
10933
              /*
10934
               * cmpl op0,op1
10935
               * sbbl dest,dest
10936
               * [addl dest, ct]
10937
               *
10938
               * Size 5 - 8.
10939
               */
10940
              if (ct)
10941
                tmp = expand_simple_binop (mode, PLUS,
10942
                                           tmp, GEN_INT (ct),
10943
                                           copy_rtx (tmp), 1, OPTAB_DIRECT);
10944
            }
10945
          else if (cf == -1)
10946
            {
10947
              /*
10948
               * cmpl op0,op1
10949
               * sbbl dest,dest
10950
               * orl $ct, dest
10951
               *
10952
               * Size 8.
10953
               */
10954
              tmp = expand_simple_binop (mode, IOR,
10955
                                         tmp, GEN_INT (ct),
10956
                                         copy_rtx (tmp), 1, OPTAB_DIRECT);
10957
            }
10958
          else if (diff == -1 && ct)
10959
            {
10960
              /*
10961
               * cmpl op0,op1
10962
               * sbbl dest,dest
10963
               * notl dest
10964
               * [addl dest, cf]
10965
               *
10966
               * Size 8 - 11.
10967
               */
10968
              tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10969
              if (cf)
10970
                tmp = expand_simple_binop (mode, PLUS,
10971
                                           copy_rtx (tmp), GEN_INT (cf),
10972
                                           copy_rtx (tmp), 1, OPTAB_DIRECT);
10973
            }
10974
          else
10975
            {
10976
              /*
10977
               * cmpl op0,op1
10978
               * sbbl dest,dest
10979
               * [notl dest]
10980
               * andl cf - ct, dest
10981
               * [addl dest, ct]
10982
               *
10983
               * Size 8 - 11.
10984
               */
10985
 
10986
              if (cf == 0)
10987
                {
10988
                  cf = ct;
10989
                  ct = 0;
10990
                  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10991
                }
10992
 
10993
              tmp = expand_simple_binop (mode, AND,
10994
                                         copy_rtx (tmp),
10995
                                         gen_int_mode (cf - ct, mode),
10996
                                         copy_rtx (tmp), 1, OPTAB_DIRECT);
10997
              if (ct)
10998
                tmp = expand_simple_binop (mode, PLUS,
10999
                                           copy_rtx (tmp), GEN_INT (ct),
11000
                                           copy_rtx (tmp), 1, OPTAB_DIRECT);
11001
            }
11002
 
11003
          if (!rtx_equal_p (tmp, out))
11004
            emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11005
 
11006
          return 1; /* DONE */
11007
        }
11008
 
11009
      if (diff < 0)
11010
        {
11011
          HOST_WIDE_INT tmp;
11012
          tmp = ct, ct = cf, cf = tmp;
11013
          diff = -diff;
11014
          if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11015
            {
11016
              /* We may be reversing unordered compare to normal compare, that
11017
                 is not valid in general (we may convert non-trapping condition
11018
                 to trapping one), however on i386 we currently emit all
11019
                 comparisons unordered.  */
11020
              compare_code = reverse_condition_maybe_unordered (compare_code);
11021
              code = reverse_condition_maybe_unordered (code);
11022
            }
11023
          else
11024
            {
11025
              compare_code = reverse_condition (compare_code);
11026
              code = reverse_condition (code);
11027
            }
11028
        }
11029
 
11030
      compare_code = UNKNOWN;
11031
      if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11032
          && GET_CODE (ix86_compare_op1) == CONST_INT)
11033
        {
11034
          if (ix86_compare_op1 == const0_rtx
11035
              && (code == LT || code == GE))
11036
            compare_code = code;
11037
          else if (ix86_compare_op1 == constm1_rtx)
11038
            {
11039
              if (code == LE)
11040
                compare_code = LT;
11041
              else if (code == GT)
11042
                compare_code = GE;
11043
            }
11044
        }
11045
 
11046
      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
11047
      if (compare_code != UNKNOWN
11048
          && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11049
          && (cf == -1 || ct == -1))
11050
        {
11051
          /* If lea code below could be used, only optimize
11052
             if it results in a 2 insn sequence.  */
11053
 
11054
          if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11055
                 || diff == 3 || diff == 5 || diff == 9)
11056
              || (compare_code == LT && ct == -1)
11057
              || (compare_code == GE && cf == -1))
11058
            {
11059
              /*
11060
               * notl op1       (if necessary)
11061
               * sarl $31, op1
11062
               * orl cf, op1
11063
               */
11064
              if (ct != -1)
11065
                {
11066
                  cf = ct;
11067
                  ct = -1;
11068
                  code = reverse_condition (code);
11069
                }
11070
 
11071
              out = emit_store_flag (out, code, ix86_compare_op0,
11072
                                     ix86_compare_op1, VOIDmode, 0, -1);
11073
 
11074
              out = expand_simple_binop (mode, IOR,
11075
                                         out, GEN_INT (cf),
11076
                                         out, 1, OPTAB_DIRECT);
11077
              if (out != operands[0])
11078
                emit_move_insn (operands[0], out);
11079
 
11080
              return 1; /* DONE */
11081
            }
11082
        }
11083
 
11084
 
11085
      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11086
           || diff == 3 || diff == 5 || diff == 9)
11087
          && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11088
          && (mode != DImode
11089
              || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11090
        {
11091
          /*
11092
           * xorl dest,dest
11093
           * cmpl op1,op2
11094
           * setcc dest
11095
           * lea cf(dest*(ct-cf)),dest
11096
           *
11097
           * Size 14.
11098
           *
11099
           * This also catches the degenerate setcc-only case.
11100
           */
11101
 
11102
          rtx tmp;
11103
          int nops;
11104
 
11105
          out = emit_store_flag (out, code, ix86_compare_op0,
11106
                                 ix86_compare_op1, VOIDmode, 0, 1);
11107
 
11108
          nops = 0;
11109
          /* On x86_64 the lea instruction operates on Pmode, so we need
11110
             to get arithmetics done in proper mode to match.  */
11111
          if (diff == 1)
11112
            tmp = copy_rtx (out);
11113
          else
11114
            {
11115
              rtx out1;
11116
              out1 = copy_rtx (out);
11117
              tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11118
              nops++;
11119
              if (diff & 1)
11120
                {
11121
                  tmp = gen_rtx_PLUS (mode, tmp, out1);
11122
                  nops++;
11123
                }
11124
            }
11125
          if (cf != 0)
11126
            {
11127
              tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11128
              nops++;
11129
            }
11130
          if (!rtx_equal_p (tmp, out))
11131
            {
11132
              if (nops == 1)
11133
                out = force_operand (tmp, copy_rtx (out));
11134
              else
11135
                emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11136
            }
11137
          if (!rtx_equal_p (out, operands[0]))
11138
            emit_move_insn (operands[0], copy_rtx (out));
11139
 
11140
          return 1; /* DONE */
11141
        }
11142
 
11143
      /*
11144
       * General case:                  Jumpful:
11145
       *   xorl dest,dest               cmpl op1, op2
11146
       *   cmpl op1, op2                movl ct, dest
11147
       *   setcc dest                   jcc 1f
11148
       *   decl dest                    movl cf, dest
11149
       *   andl (cf-ct),dest            1:
11150
       *   addl ct,dest
11151
       *
11152
       * Size 20.                       Size 14.
11153
       *
11154
       * This is reasonably steep, but branch mispredict costs are
11155
       * high on modern cpus, so consider failing only if optimizing
11156
       * for space.
11157
       */
11158
 
11159
      if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11160
          && BRANCH_COST >= 2)
11161
        {
11162
          if (cf == 0)
11163
            {
11164
              cf = ct;
11165
              ct = 0;
11166
              if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11167
                /* We may be reversing unordered compare to normal compare,
11168
                   that is not valid in general (we may convert non-trapping
11169
                   condition to trapping one), however on i386 we currently
11170
                   emit all comparisons unordered.  */
11171
                code = reverse_condition_maybe_unordered (code);
11172
              else
11173
                {
11174
                  code = reverse_condition (code);
11175
                  if (compare_code != UNKNOWN)
11176
                    compare_code = reverse_condition (compare_code);
11177
                }
11178
            }
11179
 
11180
          if (compare_code != UNKNOWN)
11181
            {
11182
              /* notl op1       (if needed)
11183
                 sarl $31, op1
11184
                 andl (cf-ct), op1
11185
                 addl ct, op1
11186
 
11187
                 For x < 0 (resp. x <= -1) there will be no notl,
11188
                 so if possible swap the constants to get rid of the
11189
                 complement.
11190
                 True/false will be -1/0 while code below (store flag
11191
                 followed by decrement) is 0/-1, so the constants need
11192
                 to be exchanged once more.  */
11193
 
11194
              if (compare_code == GE || !cf)
11195
                {
11196
                  code = reverse_condition (code);
11197
                  compare_code = LT;
11198
                }
11199
              else
11200
                {
11201
                  HOST_WIDE_INT tmp = cf;
11202
                  cf = ct;
11203
                  ct = tmp;
11204
                }
11205
 
11206
              out = emit_store_flag (out, code, ix86_compare_op0,
11207
                                     ix86_compare_op1, VOIDmode, 0, -1);
11208
            }
11209
          else
11210
            {
11211
              out = emit_store_flag (out, code, ix86_compare_op0,
11212
                                     ix86_compare_op1, VOIDmode, 0, 1);
11213
 
11214
              out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11215
                                         copy_rtx (out), 1, OPTAB_DIRECT);
11216
            }
11217
 
11218
          out = expand_simple_binop (mode, AND, copy_rtx (out),
11219
                                     gen_int_mode (cf - ct, mode),
11220
                                     copy_rtx (out), 1, OPTAB_DIRECT);
11221
          if (ct)
11222
            out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11223
                                       copy_rtx (out), 1, OPTAB_DIRECT);
11224
          if (!rtx_equal_p (out, operands[0]))
11225
            emit_move_insn (operands[0], copy_rtx (out));
11226
 
11227
          return 1; /* DONE */
11228
        }
11229
    }
11230
 
11231
  if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11232
    {
11233
      /* Try a few things more with specific constants and a variable.  */
11234
 
11235
      optab op;
11236
      rtx var, orig_out, out, tmp;
11237
 
11238
      if (BRANCH_COST <= 2)
11239
        return 0; /* FAIL */
11240
 
11241
      /* If one of the two operands is an interesting constant, load a
11242
         constant with the above and mask it in with a logical operation.  */
11243
 
11244
      if (GET_CODE (operands[2]) == CONST_INT)
11245
        {
11246
          var = operands[3];
11247
          if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11248
            operands[3] = constm1_rtx, op = and_optab;
11249
          else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11250
            operands[3] = const0_rtx, op = ior_optab;
11251
          else
11252
            return 0; /* FAIL */
11253
        }
11254
      else if (GET_CODE (operands[3]) == CONST_INT)
11255
        {
11256
          var = operands[2];
11257
          if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11258
            operands[2] = constm1_rtx, op = and_optab;
11259
          else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11260
            operands[2] = const0_rtx, op = ior_optab;
11261
          else
11262
            return 0; /* FAIL */
11263
        }
11264
      else
11265
        return 0; /* FAIL */
11266
 
11267
      orig_out = operands[0];
11268
      tmp = gen_reg_rtx (mode);
11269
      operands[0] = tmp;
11270
 
11271
      /* Recurse to get the constant loaded.  */
11272
      if (ix86_expand_int_movcc (operands) == 0)
11273
        return 0; /* FAIL */
11274
 
11275
      /* Mask in the interesting variable.  */
11276
      out = expand_binop (mode, op, var, tmp, orig_out, 0,
11277
                          OPTAB_WIDEN);
11278
      if (!rtx_equal_p (out, orig_out))
11279
        emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11280
 
11281
      return 1; /* DONE */
11282
    }
11283
 
11284
  /*
11285
   * For comparison with above,
11286
   *
11287
   * movl cf,dest
11288
   * movl ct,tmp
11289
   * cmpl op1,op2
11290
   * cmovcc tmp,dest
11291
   *
11292
   * Size 15.
11293
   */
11294
 
11295
  if (! nonimmediate_operand (operands[2], mode))
11296
    operands[2] = force_reg (mode, operands[2]);
11297
  if (! nonimmediate_operand (operands[3], mode))
11298
    operands[3] = force_reg (mode, operands[3]);
11299
 
11300
  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11301
    {
11302
      rtx tmp = gen_reg_rtx (mode);
11303
      emit_move_insn (tmp, operands[3]);
11304
      operands[3] = tmp;
11305
    }
11306
  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11307
    {
11308
      rtx tmp = gen_reg_rtx (mode);
11309
      emit_move_insn (tmp, operands[2]);
11310
      operands[2] = tmp;
11311
    }
11312
 
11313
  if (! register_operand (operands[2], VOIDmode)
11314
      && (mode == QImode
11315
          || ! register_operand (operands[3], VOIDmode)))
11316
    operands[2] = force_reg (mode, operands[2]);
11317
 
11318
  if (mode == QImode
11319
      && ! register_operand (operands[3], VOIDmode))
11320
    operands[3] = force_reg (mode, operands[3]);
11321
 
11322
  emit_insn (compare_seq);
11323
  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11324
                          gen_rtx_IF_THEN_ELSE (mode,
11325
                                                compare_op, operands[2],
11326
                                                operands[3])));
11327
  if (bypass_test)
11328
    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11329
                            gen_rtx_IF_THEN_ELSE (mode,
11330
                                  bypass_test,
11331
                                  copy_rtx (operands[3]),
11332
                                  copy_rtx (operands[0]))));
11333
  if (second_test)
11334
    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11335
                            gen_rtx_IF_THEN_ELSE (mode,
11336
                                  second_test,
11337
                                  copy_rtx (operands[2]),
11338
                                  copy_rtx (operands[0]))));
11339
 
11340
  return 1; /* DONE */
11341
}
11342
 
11343
/* Swap, force into registers, or otherwise massage the two operands
11344
   to an sse comparison with a mask result.  Thus we differ a bit from
11345
   ix86_prepare_fp_compare_args which expects to produce a flags result.
11346
 
11347
   The DEST operand exists to help determine whether to commute commutative
11348
   operators.  The POP0/POP1 operands are updated in place.  The new
11349
   comparison code is returned, or UNKNOWN if not implementable.  */
11350
 
11351
static enum rtx_code
11352
ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11353
                                  rtx *pop0, rtx *pop1)
11354
{
11355
  rtx tmp;
11356
 
11357
  switch (code)
11358
    {
11359
    case LTGT:
11360
    case UNEQ:
11361
      /* We have no LTGT as an operator.  We could implement it with
11362
         NE & ORDERED, but this requires an extra temporary.  It's
11363
         not clear that it's worth it.  */
11364
      return UNKNOWN;
11365
 
11366
    case LT:
11367
    case LE:
11368
    case UNGT:
11369
    case UNGE:
11370
      /* These are supported directly.  */
11371
      break;
11372
 
11373
    case EQ:
11374
    case NE:
11375
    case UNORDERED:
11376
    case ORDERED:
11377
      /* For commutative operators, try to canonicalize the destination
11378
         operand to be first in the comparison - this helps reload to
11379
         avoid extra moves.  */
11380
      if (!dest || !rtx_equal_p (dest, *pop1))
11381
        break;
11382
      /* FALLTHRU */
11383
 
11384
    case GE:
11385
    case GT:
11386
    case UNLE:
11387
    case UNLT:
11388
      /* These are not supported directly.  Swap the comparison operands
11389
         to transform into something that is supported.  */
11390
      tmp = *pop0;
11391
      *pop0 = *pop1;
11392
      *pop1 = tmp;
11393
      code = swap_condition (code);
11394
      break;
11395
 
11396
    default:
11397
      gcc_unreachable ();
11398
    }
11399
 
11400
  return code;
11401
}
11402
 
11403
/* Detect conditional moves that exactly match min/max operational
11404
   semantics.  Note that this is IEEE safe, as long as we don't
11405
   interchange the operands.
11406
 
11407
   Returns FALSE if this conditional move doesn't match a MIN/MAX,
11408
   and TRUE if the operation is successful and instructions are emitted.  */
11409
 
11410
static bool
11411
ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11412
                           rtx cmp_op1, rtx if_true, rtx if_false)
11413
{
11414
  enum machine_mode mode;
11415
  bool is_min;
11416
  rtx tmp;
11417
 
11418
  if (code == LT)
11419
    ;
11420
  else if (code == UNGE)
11421
    {
11422
      tmp = if_true;
11423
      if_true = if_false;
11424
      if_false = tmp;
11425
    }
11426
  else
11427
    return false;
11428
 
11429
  if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11430
    is_min = true;
11431
  else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11432
    is_min = false;
11433
  else
11434
    return false;
11435
 
11436
  mode = GET_MODE (dest);
11437
 
11438
  /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11439
     but MODE may be a vector mode and thus not appropriate.  */
11440
  if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11441
    {
11442
      int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11443
      rtvec v;
11444
 
11445
      if_true = force_reg (mode, if_true);
11446
      v = gen_rtvec (2, if_true, if_false);
11447
      tmp = gen_rtx_UNSPEC (mode, v, u);
11448
    }
11449
  else
11450
    {
11451
      code = is_min ? SMIN : SMAX;
11452
      tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11453
    }
11454
 
11455
  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11456
  return true;
11457
}
11458
 
11459
/* Expand an sse vector comparison.  Return the register with the result.  */
11460
 
11461
static rtx
11462
ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11463
                     rtx op_true, rtx op_false)
11464
{
11465
  enum machine_mode mode = GET_MODE (dest);
11466
  rtx x;
11467
 
11468
  cmp_op0 = force_reg (mode, cmp_op0);
11469
  if (!nonimmediate_operand (cmp_op1, mode))
11470
    cmp_op1 = force_reg (mode, cmp_op1);
11471
 
11472
  if (optimize
11473
      || reg_overlap_mentioned_p (dest, op_true)
11474
      || reg_overlap_mentioned_p (dest, op_false))
11475
    dest = gen_reg_rtx (mode);
11476
 
11477
  x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11478
  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11479
 
11480
  return dest;
11481
}
11482
 
11483
/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11484
   operations.  This is used for both scalar and vector conditional moves.  */
11485
 
11486
static void
11487
ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11488
{
11489
  enum machine_mode mode = GET_MODE (dest);
11490
  rtx t2, t3, x;
11491
 
11492
  if (op_false == CONST0_RTX (mode))
11493
    {
11494
      op_true = force_reg (mode, op_true);
11495
      x = gen_rtx_AND (mode, cmp, op_true);
11496
      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11497
    }
11498
  else if (op_true == CONST0_RTX (mode))
11499
    {
11500
      op_false = force_reg (mode, op_false);
11501
      x = gen_rtx_NOT (mode, cmp);
11502
      x = gen_rtx_AND (mode, x, op_false);
11503
      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11504
    }
11505
  else
11506
    {
11507
      op_true = force_reg (mode, op_true);
11508
      op_false = force_reg (mode, op_false);
11509
 
11510
      t2 = gen_reg_rtx (mode);
11511
      if (optimize)
11512
        t3 = gen_reg_rtx (mode);
11513
      else
11514
        t3 = dest;
11515
 
11516
      x = gen_rtx_AND (mode, op_true, cmp);
11517
      emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11518
 
11519
      x = gen_rtx_NOT (mode, cmp);
11520
      x = gen_rtx_AND (mode, x, op_false);
11521
      emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11522
 
11523
      x = gen_rtx_IOR (mode, t3, t2);
11524
      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11525
    }
11526
}
11527
 
11528
/* Expand a floating-point conditional move.  Return true if successful.  */
11529
 
11530
int
11531
ix86_expand_fp_movcc (rtx operands[])
11532
{
11533
  enum machine_mode mode = GET_MODE (operands[0]);
11534
  enum rtx_code code = GET_CODE (operands[1]);
11535
  rtx tmp, compare_op, second_test, bypass_test;
11536
 
11537
  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11538
    {
11539
      enum machine_mode cmode;
11540
 
11541
      /* Since we've no cmove for sse registers, don't force bad register
11542
         allocation just to gain access to it.  Deny movcc when the
11543
         comparison mode doesn't match the move mode.  */
11544
      cmode = GET_MODE (ix86_compare_op0);
11545
      if (cmode == VOIDmode)
11546
        cmode = GET_MODE (ix86_compare_op1);
11547
      if (cmode != mode)
11548
        return 0;
11549
 
11550
      code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11551
                                               &ix86_compare_op0,
11552
                                               &ix86_compare_op1);
11553
      if (code == UNKNOWN)
11554
        return 0;
11555
 
11556
      if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11557
                                     ix86_compare_op1, operands[2],
11558
                                     operands[3]))
11559
        return 1;
11560
 
11561
      tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11562
                                 ix86_compare_op1, operands[2], operands[3]);
11563
      ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11564
      return 1;
11565
    }
11566
 
11567
  /* The floating point conditional move instructions don't directly
11568
     support conditions resulting from a signed integer comparison.  */
11569
 
11570
  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11571
 
11572
  /* The floating point conditional move instructions don't directly
11573
     support signed integer comparisons.  */
11574
 
11575
  if (!fcmov_comparison_operator (compare_op, VOIDmode))
11576
    {
11577
      gcc_assert (!second_test && !bypass_test);
11578
      tmp = gen_reg_rtx (QImode);
11579
      ix86_expand_setcc (code, tmp);
11580
      code = NE;
11581
      ix86_compare_op0 = tmp;
11582
      ix86_compare_op1 = const0_rtx;
11583
      compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
11584
    }
11585
  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11586
    {
11587
      tmp = gen_reg_rtx (mode);
11588
      emit_move_insn (tmp, operands[3]);
11589
      operands[3] = tmp;
11590
    }
11591
  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11592
    {
11593
      tmp = gen_reg_rtx (mode);
11594
      emit_move_insn (tmp, operands[2]);
11595
      operands[2] = tmp;
11596
    }
11597
 
11598
  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11599
                          gen_rtx_IF_THEN_ELSE (mode, compare_op,
11600
                                                operands[2], operands[3])));
11601
  if (bypass_test)
11602
    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11603
                            gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11604
                                                  operands[3], operands[0])));
11605
  if (second_test)
11606
    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11607
                            gen_rtx_IF_THEN_ELSE (mode, second_test,
11608
                                                  operands[2], operands[0])));
11609
 
11610
  return 1;
11611
}
11612
 
11613
/* Expand a floating-point vector conditional move; a vcond operation
11614
   rather than a movcc operation.  */
11615
 
11616
bool
11617
ix86_expand_fp_vcond (rtx operands[])
11618
{
11619
  enum rtx_code code = GET_CODE (operands[3]);
11620
  rtx cmp;
11621
 
11622
  code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11623
                                           &operands[4], &operands[5]);
11624
  if (code == UNKNOWN)
11625
    return false;
11626
 
11627
  if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11628
                                 operands[5], operands[1], operands[2]))
11629
    return true;
11630
 
11631
  cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11632
                             operands[1], operands[2]);
11633
  ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11634
  return true;
11635
}
11636
 
11637
/* Expand a signed integral vector conditional move.  */
11638
 
11639
bool
11640
ix86_expand_int_vcond (rtx operands[])
11641
{
11642
  enum machine_mode mode = GET_MODE (operands[0]);
11643
  enum rtx_code code = GET_CODE (operands[3]);
11644
  bool negate = false;
11645
  rtx x, cop0, cop1;
11646
 
11647
  cop0 = operands[4];
11648
  cop1 = operands[5];
11649
 
11650
  /* Canonicalize the comparison to EQ, GT, GTU.  */
11651
  switch (code)
11652
    {
11653
    case EQ:
11654
    case GT:
11655
    case GTU:
11656
      break;
11657
 
11658
    case NE:
11659
    case LE:
11660
    case LEU:
11661
      code = reverse_condition (code);
11662
      negate = true;
11663
      break;
11664
 
11665
    case GE:
11666
    case GEU:
11667
      code = reverse_condition (code);
11668
      negate = true;
11669
      /* FALLTHRU */
11670
 
11671
    case LT:
11672
    case LTU:
11673
      code = swap_condition (code);
11674
      x = cop0, cop0 = cop1, cop1 = x;
11675
      break;
11676
 
11677
    default:
11678
      gcc_unreachable ();
11679
    }
11680
 
11681
  /* Unsigned parallel compare is not supported by the hardware.  Play some
11682
     tricks to turn this into a signed comparison against 0.  */
11683
  if (code == GTU)
11684
    {
11685
      cop0 = force_reg (mode, cop0);
11686
 
11687
      switch (mode)
11688
        {
11689
        case V4SImode:
11690
          {
11691
            rtx t1, t2, mask;
11692
 
11693
            /* Perform a parallel modulo subtraction.  */
11694
            t1 = gen_reg_rtx (mode);
11695
            emit_insn (gen_subv4si3 (t1, cop0, cop1));
11696
 
11697
            /* Extract the original sign bit of op0.  */
11698
            mask = GEN_INT (-0x80000000);
11699
            mask = gen_rtx_CONST_VECTOR (mode,
11700
                        gen_rtvec (4, mask, mask, mask, mask));
11701
            mask = force_reg (mode, mask);
11702
            t2 = gen_reg_rtx (mode);
11703
            emit_insn (gen_andv4si3 (t2, cop0, mask));
11704
 
11705
            /* XOR it back into the result of the subtraction.  This results
11706
               in the sign bit set iff we saw unsigned underflow.  */
11707
            x = gen_reg_rtx (mode);
11708
            emit_insn (gen_xorv4si3 (x, t1, t2));
11709
 
11710
            code = GT;
11711
          }
11712
          break;
11713
 
11714
        case V16QImode:
11715
        case V8HImode:
11716
          /* Perform a parallel unsigned saturating subtraction.  */
11717
          x = gen_reg_rtx (mode);
11718
          emit_insn (gen_rtx_SET (VOIDmode, x,
11719
                                  gen_rtx_US_MINUS (mode, cop0, cop1)));
11720
 
11721
          code = EQ;
11722
          negate = !negate;
11723
          break;
11724
 
11725
        default:
11726
          gcc_unreachable ();
11727
        }
11728
 
11729
      cop0 = x;
11730
      cop1 = CONST0_RTX (mode);
11731
    }
11732
 
11733
  x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11734
                           operands[1+negate], operands[2-negate]);
11735
 
11736
  ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11737
                         operands[2-negate]);
11738
  return true;
11739
}
11740
 
11741
/* Expand conditional increment or decrement using adb/sbb instructions.
11742
   The default case using setcc followed by the conditional move can be
11743
   done by generic code.  */
11744
int
11745
ix86_expand_int_addcc (rtx operands[])
11746
{
11747
  enum rtx_code code = GET_CODE (operands[1]);
11748
  rtx compare_op;
11749
  rtx val = const0_rtx;
11750
  bool fpcmp = false;
11751
  enum machine_mode mode = GET_MODE (operands[0]);
11752
 
11753
  if (operands[3] != const1_rtx
11754
      && operands[3] != constm1_rtx)
11755
    return 0;
11756
  if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11757
                                       ix86_compare_op1, &compare_op))
11758
     return 0;
11759
  code = GET_CODE (compare_op);
11760
 
11761
  if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11762
      || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11763
    {
11764
      fpcmp = true;
11765
      code = ix86_fp_compare_code_to_integer (code);
11766
    }
11767
 
11768
  if (code != LTU)
11769
    {
11770
      val = constm1_rtx;
11771
      if (fpcmp)
11772
        PUT_CODE (compare_op,
11773
                  reverse_condition_maybe_unordered
11774
                    (GET_CODE (compare_op)));
11775
      else
11776
        PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11777
    }
11778
  PUT_MODE (compare_op, mode);
11779
 
11780
  /* Construct either adc or sbb insn.  */
11781
  if ((code == LTU) == (operands[3] == constm1_rtx))
11782
    {
11783
      switch (GET_MODE (operands[0]))
11784
        {
11785
          case QImode:
11786
            emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11787
            break;
11788
          case HImode:
11789
            emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11790
            break;
11791
          case SImode:
11792
            emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11793
            break;
11794
          case DImode:
11795
            emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11796
            break;
11797
          default:
11798
            gcc_unreachable ();
11799
        }
11800
    }
11801
  else
11802
    {
11803
      switch (GET_MODE (operands[0]))
11804
        {
11805
          case QImode:
11806
            emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11807
            break;
11808
          case HImode:
11809
            emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11810
            break;
11811
          case SImode:
11812
            emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11813
            break;
11814
          case DImode:
11815
            emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11816
            break;
11817
          default:
11818
            gcc_unreachable ();
11819
        }
11820
    }
11821
  return 1; /* DONE */
11822
}
11823
 
11824
 
11825
/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
11826
   works for floating pointer parameters and nonoffsetable memories.
11827
   For pushes, it returns just stack offsets; the values will be saved
11828
   in the right order.  Maximally three parts are generated.  */
11829
 
11830
static int
11831
ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11832
{
11833
  int size;
11834
 
11835
  if (!TARGET_64BIT)
11836
    size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11837
  else
11838
    size = (GET_MODE_SIZE (mode) + 4) / 8;
11839
 
11840
  gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11841
  gcc_assert (size >= 2 && size <= 3);
11842
 
11843
  /* Optimize constant pool reference to immediates.  This is used by fp
11844
     moves, that force all constants to memory to allow combining.  */
11845
  if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11846
    {
11847
      rtx tmp = maybe_get_pool_constant (operand);
11848
      if (tmp)
11849
        operand = tmp;
11850
    }
11851
 
11852
  if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11853
    {
11854
      /* The only non-offsetable memories we handle are pushes.  */
11855
      int ok = push_operand (operand, VOIDmode);
11856
 
11857
      gcc_assert (ok);
11858
 
11859
      operand = copy_rtx (operand);
11860
      PUT_MODE (operand, Pmode);
11861
      parts[0] = parts[1] = parts[2] = operand;
11862
      return size;
11863
    }
11864
 
11865
  if (GET_CODE (operand) == CONST_VECTOR)
11866
    {
11867
      enum machine_mode imode = int_mode_for_mode (mode);
11868
      /* Caution: if we looked through a constant pool memory above,
11869
         the operand may actually have a different mode now.  That's
11870
         ok, since we want to pun this all the way back to an integer.  */
11871
      operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11872
      gcc_assert (operand != NULL);
11873
      mode = imode;
11874
    }
11875
 
11876
  if (!TARGET_64BIT)
11877
    {
11878
      if (mode == DImode)
11879
        split_di (&operand, 1, &parts[0], &parts[1]);
11880
      else
11881
        {
11882
          if (REG_P (operand))
11883
            {
11884
              gcc_assert (reload_completed);
11885
              parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11886
              parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11887
              if (size == 3)
11888
                parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11889
            }
11890
          else if (offsettable_memref_p (operand))
11891
            {
11892
              operand = adjust_address (operand, SImode, 0);
11893
              parts[0] = operand;
11894
              parts[1] = adjust_address (operand, SImode, 4);
11895
              if (size == 3)
11896
                parts[2] = adjust_address (operand, SImode, 8);
11897
            }
11898
          else if (GET_CODE (operand) == CONST_DOUBLE)
11899
            {
11900
              REAL_VALUE_TYPE r;
11901
              long l[4];
11902
 
11903
              REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11904
              switch (mode)
11905
                {
11906
                case XFmode:
11907
                  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11908
                  parts[2] = gen_int_mode (l[2], SImode);
11909
                  break;
11910
                case DFmode:
11911
                  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11912
                  break;
11913
                default:
11914
                  gcc_unreachable ();
11915
                }
11916
              parts[1] = gen_int_mode (l[1], SImode);
11917
              parts[0] = gen_int_mode (l[0], SImode);
11918
            }
11919
          else
11920
            gcc_unreachable ();
11921
        }
11922
    }
11923
  else
11924
    {
11925
      if (mode == TImode)
11926
        split_ti (&operand, 1, &parts[0], &parts[1]);
11927
      if (mode == XFmode || mode == TFmode)
11928
        {
11929
          enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11930
          if (REG_P (operand))
11931
            {
11932
              gcc_assert (reload_completed);
11933
              parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11934
              parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11935
            }
11936
          else if (offsettable_memref_p (operand))
11937
            {
11938
              operand = adjust_address (operand, DImode, 0);
11939
              parts[0] = operand;
11940
              parts[1] = adjust_address (operand, upper_mode, 8);
11941
            }
11942
          else if (GET_CODE (operand) == CONST_DOUBLE)
11943
            {
11944
              REAL_VALUE_TYPE r;
11945
              long l[4];
11946
 
11947
              REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11948
              real_to_target (l, &r, mode);
11949
 
11950
              /* Do not use shift by 32 to avoid warning on 32bit systems.  */
11951
              if (HOST_BITS_PER_WIDE_INT >= 64)
11952
                parts[0]
11953
                  = gen_int_mode
11954
                      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11955
                       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11956
                       DImode);
11957
              else
11958
                parts[0] = immed_double_const (l[0], l[1], DImode);
11959
 
11960
              if (upper_mode == SImode)
11961
                parts[1] = gen_int_mode (l[2], SImode);
11962
              else if (HOST_BITS_PER_WIDE_INT >= 64)
11963
                parts[1]
11964
                  = gen_int_mode
11965
                      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11966
                       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11967
                       DImode);
11968
              else
11969
                parts[1] = immed_double_const (l[2], l[3], DImode);
11970
            }
11971
          else
11972
            gcc_unreachable ();
11973
        }
11974
    }
11975
 
11976
  return size;
11977
}
11978
 
11979
/* Emit insns to perform a move or push of DI, DF, and XF values.
11980
   Return false when normal moves are needed; true when all required
11981
   insns have been emitted.  Operands 2-4 contain the input values
11982
   int the correct order; operands 5-7 contain the output values.  */
11983
 
11984
void
11985
ix86_split_long_move (rtx operands[])
11986
{
11987
  rtx part[2][3];
11988
  int nparts;
11989
  int push = 0;
11990
  int collisions = 0;
11991
  enum machine_mode mode = GET_MODE (operands[0]);
11992
 
11993
  /* The DFmode expanders may ask us to move double.
11994
     For 64bit target this is single move.  By hiding the fact
11995
     here we simplify i386.md splitters.  */
11996
  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11997
    {
11998
      /* Optimize constant pool reference to immediates.  This is used by
11999
         fp moves, that force all constants to memory to allow combining.  */
12000
 
12001
      if (GET_CODE (operands[1]) == MEM
12002
          && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12003
          && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12004
        operands[1] = get_pool_constant (XEXP (operands[1], 0));
12005
      if (push_operand (operands[0], VOIDmode))
12006
        {
12007
          operands[0] = copy_rtx (operands[0]);
12008
          PUT_MODE (operands[0], Pmode);
12009
        }
12010
      else
12011
        operands[0] = gen_lowpart (DImode, operands[0]);
12012
      operands[1] = gen_lowpart (DImode, operands[1]);
12013
      emit_move_insn (operands[0], operands[1]);
12014
      return;
12015
    }
12016
 
12017
  /* The only non-offsettable memory we handle is push.  */
12018
  if (push_operand (operands[0], VOIDmode))
12019
    push = 1;
12020
  else
12021
    gcc_assert (GET_CODE (operands[0]) != MEM
12022
                || offsettable_memref_p (operands[0]));
12023
 
12024
  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12025
  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12026
 
12027
  /* When emitting push, take care for source operands on the stack.  */
12028
  if (push && GET_CODE (operands[1]) == MEM
12029
      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12030
    {
12031
      if (nparts == 3)
12032
        part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12033
                                     XEXP (part[1][2], 0));
12034
      part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12035
                                   XEXP (part[1][1], 0));
12036
    }
12037
 
12038
  /* We need to do copy in the right order in case an address register
12039
     of the source overlaps the destination.  */
12040
  if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12041
    {
12042
      if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12043
        collisions++;
12044
      if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12045
        collisions++;
12046
      if (nparts == 3
12047
          && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12048
        collisions++;
12049
 
12050
      /* Collision in the middle part can be handled by reordering.  */
12051
      if (collisions == 1 && nparts == 3
12052
          && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12053
        {
12054
          rtx tmp;
12055
          tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12056
          tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12057
        }
12058
 
12059
      /* If there are more collisions, we can't handle it by reordering.
12060
         Do an lea to the last part and use only one colliding move.  */
12061
      else if (collisions > 1)
12062
        {
12063
          rtx base;
12064
 
12065
          collisions = 1;
12066
 
12067
          base = part[0][nparts - 1];
12068
 
12069
          /* Handle the case when the last part isn't valid for lea.
12070
             Happens in 64-bit mode storing the 12-byte XFmode.  */
12071
          if (GET_MODE (base) != Pmode)
12072
            base = gen_rtx_REG (Pmode, REGNO (base));
12073
 
12074
          emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12075
          part[1][0] = replace_equiv_address (part[1][0], base);
12076
          part[1][1] = replace_equiv_address (part[1][1],
12077
                                      plus_constant (base, UNITS_PER_WORD));
12078
          if (nparts == 3)
12079
            part[1][2] = replace_equiv_address (part[1][2],
12080
                                      plus_constant (base, 8));
12081
        }
12082
    }
12083
 
12084
  if (push)
12085
    {
12086
      if (!TARGET_64BIT)
12087
        {
12088
          if (nparts == 3)
12089
            {
12090
              if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12091
                emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12092
              emit_move_insn (part[0][2], part[1][2]);
12093
            }
12094
        }
12095
      else
12096
        {
12097
          /* In 64bit mode we don't have 32bit push available.  In case this is
12098
             register, it is OK - we will just use larger counterpart.  We also
12099
             retype memory - these comes from attempt to avoid REX prefix on
12100
             moving of second half of TFmode value.  */
12101
          if (GET_MODE (part[1][1]) == SImode)
12102
            {
12103
              switch (GET_CODE (part[1][1]))
12104
                {
12105
                case MEM:
12106
                  part[1][1] = adjust_address (part[1][1], DImode, 0);
12107
                  break;
12108
 
12109
                case REG:
12110
                  part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12111
                  break;
12112
 
12113
                default:
12114
                  gcc_unreachable ();
12115
                }
12116
 
12117
              if (GET_MODE (part[1][0]) == SImode)
12118
                part[1][0] = part[1][1];
12119
            }
12120
        }
12121
      emit_move_insn (part[0][1], part[1][1]);
12122
      emit_move_insn (part[0][0], part[1][0]);
12123
      return;
12124
    }
12125
 
12126
  /* Choose correct order to not overwrite the source before it is copied.  */
12127
  if ((REG_P (part[0][0])
12128
       && REG_P (part[1][1])
12129
       && (REGNO (part[0][0]) == REGNO (part[1][1])
12130
           || (nparts == 3
12131
               && REGNO (part[0][0]) == REGNO (part[1][2]))))
12132
      || (collisions > 0
12133
          && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12134
    {
12135
      if (nparts == 3)
12136
        {
12137
          operands[2] = part[0][2];
12138
          operands[3] = part[0][1];
12139
          operands[4] = part[0][0];
12140
          operands[5] = part[1][2];
12141
          operands[6] = part[1][1];
12142
          operands[7] = part[1][0];
12143
        }
12144
      else
12145
        {
12146
          operands[2] = part[0][1];
12147
          operands[3] = part[0][0];
12148
          operands[5] = part[1][1];
12149
          operands[6] = part[1][0];
12150
        }
12151
    }
12152
  else
12153
    {
12154
      if (nparts == 3)
12155
        {
12156
          operands[2] = part[0][0];
12157
          operands[3] = part[0][1];
12158
          operands[4] = part[0][2];
12159
          operands[5] = part[1][0];
12160
          operands[6] = part[1][1];
12161
          operands[7] = part[1][2];
12162
        }
12163
      else
12164
        {
12165
          operands[2] = part[0][0];
12166
          operands[3] = part[0][1];
12167
          operands[5] = part[1][0];
12168
          operands[6] = part[1][1];
12169
        }
12170
    }
12171
 
12172
  /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
12173
  if (optimize_size)
12174
    {
12175
      if (GET_CODE (operands[5]) == CONST_INT
12176
          && operands[5] != const0_rtx
12177
          && REG_P (operands[2]))
12178
        {
12179
          if (GET_CODE (operands[6]) == CONST_INT
12180
              && INTVAL (operands[6]) == INTVAL (operands[5]))
12181
            operands[6] = operands[2];
12182
 
12183
          if (nparts == 3
12184
              && GET_CODE (operands[7]) == CONST_INT
12185
              && INTVAL (operands[7]) == INTVAL (operands[5]))
12186
            operands[7] = operands[2];
12187
        }
12188
 
12189
      if (nparts == 3
12190
          && GET_CODE (operands[6]) == CONST_INT
12191
          && operands[6] != const0_rtx
12192
          && REG_P (operands[3])
12193
          && GET_CODE (operands[7]) == CONST_INT
12194
          && INTVAL (operands[7]) == INTVAL (operands[6]))
12195
        operands[7] = operands[3];
12196
    }
12197
 
12198
  emit_move_insn (operands[2], operands[5]);
12199
  emit_move_insn (operands[3], operands[6]);
12200
  if (nparts == 3)
12201
    emit_move_insn (operands[4], operands[7]);
12202
 
12203
  return;
12204
}
12205
 
12206
/* Helper function of ix86_split_ashl used to generate an SImode/DImode
12207
   left shift by a constant, either using a single shift or
12208
   a sequence of add instructions.  */
12209
 
12210
static void
12211
ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12212
{
12213
  if (count == 1)
12214
    {
12215
      emit_insn ((mode == DImode
12216
                  ? gen_addsi3
12217
                  : gen_adddi3) (operand, operand, operand));
12218
    }
12219
  else if (!optimize_size
12220
           && count * ix86_cost->add <= ix86_cost->shift_const)
12221
    {
12222
      int i;
12223
      for (i=0; i<count; i++)
12224
        {
12225
          emit_insn ((mode == DImode
12226
                      ? gen_addsi3
12227
                      : gen_adddi3) (operand, operand, operand));
12228
        }
12229
    }
12230
  else
12231
    emit_insn ((mode == DImode
12232
                ? gen_ashlsi3
12233
                : gen_ashldi3) (operand, operand, GEN_INT (count)));
12234
}
12235
 
12236
void
12237
ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12238
{
12239
  rtx low[2], high[2];
12240
  int count;
12241
  const int single_width = mode == DImode ? 32 : 64;
12242
 
12243
  if (GET_CODE (operands[2]) == CONST_INT)
12244
    {
12245
      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12246
      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12247
 
12248
      if (count >= single_width)
12249
        {
12250
          emit_move_insn (high[0], low[1]);
12251
          emit_move_insn (low[0], const0_rtx);
12252
 
12253
          if (count > single_width)
12254
            ix86_expand_ashl_const (high[0], count - single_width, mode);
12255
        }
12256
      else
12257
        {
12258
          if (!rtx_equal_p (operands[0], operands[1]))
12259
            emit_move_insn (operands[0], operands[1]);
12260
          emit_insn ((mode == DImode
12261
                     ? gen_x86_shld_1
12262
                     : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12263
          ix86_expand_ashl_const (low[0], count, mode);
12264
        }
12265
      return;
12266
    }
12267
 
12268
  (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12269
 
12270
  if (operands[1] == const1_rtx)
12271
    {
12272
      /* Assuming we've chosen a QImode capable registers, then 1 << N
12273
         can be done with two 32/64-bit shifts, no branches, no cmoves.  */
12274
      if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12275
        {
12276
          rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12277
 
12278
          ix86_expand_clear (low[0]);
12279
          ix86_expand_clear (high[0]);
12280
          emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12281
 
12282
          d = gen_lowpart (QImode, low[0]);
12283
          d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12284
          s = gen_rtx_EQ (QImode, flags, const0_rtx);
12285
          emit_insn (gen_rtx_SET (VOIDmode, d, s));
12286
 
12287
          d = gen_lowpart (QImode, high[0]);
12288
          d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12289
          s = gen_rtx_NE (QImode, flags, const0_rtx);
12290
          emit_insn (gen_rtx_SET (VOIDmode, d, s));
12291
        }
12292
 
12293
      /* Otherwise, we can get the same results by manually performing
12294
         a bit extract operation on bit 5/6, and then performing the two
12295
         shifts.  The two methods of getting 0/1 into low/high are exactly
12296
         the same size.  Avoiding the shift in the bit extract case helps
12297
         pentium4 a bit; no one else seems to care much either way.  */
12298
      else
12299
        {
12300
          rtx x;
12301
 
12302
          if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12303
            x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12304
          else
12305
            x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12306
          emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12307
 
12308
          emit_insn ((mode == DImode
12309
                      ? gen_lshrsi3
12310
                      : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12311
          emit_insn ((mode == DImode
12312
                      ? gen_andsi3
12313
                      : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12314
          emit_move_insn (low[0], high[0]);
12315
          emit_insn ((mode == DImode
12316
                      ? gen_xorsi3
12317
                      : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12318
        }
12319
 
12320
      emit_insn ((mode == DImode
12321
                    ? gen_ashlsi3
12322
                    : gen_ashldi3) (low[0], low[0], operands[2]));
12323
      emit_insn ((mode == DImode
12324
                    ? gen_ashlsi3
12325
                    : gen_ashldi3) (high[0], high[0], operands[2]));
12326
      return;
12327
    }
12328
 
12329
  if (operands[1] == constm1_rtx)
12330
    {
12331
      /* For -1 << N, we can avoid the shld instruction, because we
12332
         know that we're shifting 0...31/63 ones into a -1.  */
12333
      emit_move_insn (low[0], constm1_rtx);
12334
      if (optimize_size)
12335
        emit_move_insn (high[0], low[0]);
12336
      else
12337
        emit_move_insn (high[0], constm1_rtx);
12338
    }
12339
  else
12340
    {
12341
      if (!rtx_equal_p (operands[0], operands[1]))
12342
        emit_move_insn (operands[0], operands[1]);
12343
 
12344
      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12345
      emit_insn ((mode == DImode
12346
                  ? gen_x86_shld_1
12347
                  : gen_x86_64_shld) (high[0], low[0], operands[2]));
12348
    }
12349
 
12350
  emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12351
 
12352
  if (TARGET_CMOVE && scratch)
12353
    {
12354
      ix86_expand_clear (scratch);
12355
      emit_insn ((mode == DImode
12356
                  ? gen_x86_shift_adj_1
12357
                  : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12358
    }
12359
  else
12360
    emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12361
}
12362
 
12363
void
12364
ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12365
{
12366
  rtx low[2], high[2];
12367
  int count;
12368
  const int single_width = mode == DImode ? 32 : 64;
12369
 
12370
  if (GET_CODE (operands[2]) == CONST_INT)
12371
    {
12372
      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12373
      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12374
 
12375
      if (count == single_width * 2 - 1)
12376
        {
12377
          emit_move_insn (high[0], high[1]);
12378
          emit_insn ((mode == DImode
12379
                      ? gen_ashrsi3
12380
                      : gen_ashrdi3) (high[0], high[0],
12381
                                      GEN_INT (single_width - 1)));
12382
          emit_move_insn (low[0], high[0]);
12383
 
12384
        }
12385
      else if (count >= single_width)
12386
        {
12387
          emit_move_insn (low[0], high[1]);
12388
          emit_move_insn (high[0], low[0]);
12389
          emit_insn ((mode == DImode
12390
                      ? gen_ashrsi3
12391
                      : gen_ashrdi3) (high[0], high[0],
12392
                                      GEN_INT (single_width - 1)));
12393
          if (count > single_width)
12394
            emit_insn ((mode == DImode
12395
                        ? gen_ashrsi3
12396
                        : gen_ashrdi3) (low[0], low[0],
12397
                                        GEN_INT (count - single_width)));
12398
        }
12399
      else
12400
        {
12401
          if (!rtx_equal_p (operands[0], operands[1]))
12402
            emit_move_insn (operands[0], operands[1]);
12403
          emit_insn ((mode == DImode
12404
                      ? gen_x86_shrd_1
12405
                      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12406
          emit_insn ((mode == DImode
12407
                      ? gen_ashrsi3
12408
                      : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12409
        }
12410
    }
12411
  else
12412
    {
12413
      if (!rtx_equal_p (operands[0], operands[1]))
12414
        emit_move_insn (operands[0], operands[1]);
12415
 
12416
      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12417
 
12418
      emit_insn ((mode == DImode
12419
                  ? gen_x86_shrd_1
12420
                  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12421
      emit_insn ((mode == DImode
12422
                  ? gen_ashrsi3
12423
                  : gen_ashrdi3)  (high[0], high[0], operands[2]));
12424
 
12425
      if (TARGET_CMOVE && scratch)
12426
        {
12427
          emit_move_insn (scratch, high[0]);
12428
          emit_insn ((mode == DImode
12429
                      ? gen_ashrsi3
12430
                      : gen_ashrdi3) (scratch, scratch,
12431
                                      GEN_INT (single_width - 1)));
12432
          emit_insn ((mode == DImode
12433
                      ? gen_x86_shift_adj_1
12434
                      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12435
                                         scratch));
12436
        }
12437
      else
12438
        emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12439
    }
12440
}
12441
 
12442
void
12443
ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12444
{
12445
  rtx low[2], high[2];
12446
  int count;
12447
  const int single_width = mode == DImode ? 32 : 64;
12448
 
12449
  if (GET_CODE (operands[2]) == CONST_INT)
12450
    {
12451
      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12452
      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12453
 
12454
      if (count >= single_width)
12455
        {
12456
          emit_move_insn (low[0], high[1]);
12457
          ix86_expand_clear (high[0]);
12458
 
12459
          if (count > single_width)
12460
            emit_insn ((mode == DImode
12461
                        ? gen_lshrsi3
12462
                        : gen_lshrdi3) (low[0], low[0],
12463
                                        GEN_INT (count - single_width)));
12464
        }
12465
      else
12466
        {
12467
          if (!rtx_equal_p (operands[0], operands[1]))
12468
            emit_move_insn (operands[0], operands[1]);
12469
          emit_insn ((mode == DImode
12470
                      ? gen_x86_shrd_1
12471
                      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12472
          emit_insn ((mode == DImode
12473
                      ? gen_lshrsi3
12474
                      : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12475
        }
12476
    }
12477
  else
12478
    {
12479
      if (!rtx_equal_p (operands[0], operands[1]))
12480
        emit_move_insn (operands[0], operands[1]);
12481
 
12482
      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12483
 
12484
      emit_insn ((mode == DImode
12485
                  ? gen_x86_shrd_1
12486
                  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12487
      emit_insn ((mode == DImode
12488
                  ? gen_lshrsi3
12489
                  : gen_lshrdi3) (high[0], high[0], operands[2]));
12490
 
12491
      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
12492
      if (TARGET_CMOVE && scratch)
12493
        {
12494
          ix86_expand_clear (scratch);
12495
          emit_insn ((mode == DImode
12496
                      ? gen_x86_shift_adj_1
12497
                      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12498
                                               scratch));
12499
        }
12500
      else
12501
        emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12502
    }
12503
}
12504
 
12505
/* Helper function for the string operations below.  Dest VARIABLE whether
12506
   it is aligned to VALUE bytes.  If true, jump to the label.  */
12507
static rtx
12508
ix86_expand_aligntest (rtx variable, int value)
12509
{
12510
  rtx label = gen_label_rtx ();
12511
  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12512
  if (GET_MODE (variable) == DImode)
12513
    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12514
  else
12515
    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12516
  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12517
                           1, label);
12518
  return label;
12519
}
12520
 
12521
/* Adjust COUNTER by the VALUE.  */
12522
static void
12523
ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12524
{
12525
  if (GET_MODE (countreg) == DImode)
12526
    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12527
  else
12528
    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12529
}
12530
 
12531
/* Zero extend possibly SImode EXP to Pmode register.  */
12532
rtx
12533
ix86_zero_extend_to_Pmode (rtx exp)
12534
{
12535
  rtx r;
12536
  if (GET_MODE (exp) == VOIDmode)
12537
    return force_reg (Pmode, exp);
12538
  if (GET_MODE (exp) == Pmode)
12539
    return copy_to_mode_reg (Pmode, exp);
12540
  r = gen_reg_rtx (Pmode);
12541
  emit_insn (gen_zero_extendsidi2 (r, exp));
12542
  return r;
12543
}
12544
 
12545
/* Expand string move (memcpy) operation.  Use i386 string operations when
12546
   profitable.  expand_clrmem contains similar code.  */
12547
int
12548
ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12549
{
12550
  rtx srcreg, destreg, countreg, srcexp, destexp;
12551
  enum machine_mode counter_mode;
12552
  HOST_WIDE_INT align = 0;
12553
  unsigned HOST_WIDE_INT count = 0;
12554
 
12555
  if (GET_CODE (align_exp) == CONST_INT)
12556
    align = INTVAL (align_exp);
12557
 
12558
  /* Can't use any of this if the user has appropriated esi or edi.  */
12559
  if (global_regs[4] || global_regs[5])
12560
    return 0;
12561
 
12562
  /* This simple hack avoids all inlining code and simplifies code below.  */
12563
  if (!TARGET_ALIGN_STRINGOPS)
12564
    align = 64;
12565
 
12566
  if (GET_CODE (count_exp) == CONST_INT)
12567
    {
12568
      count = INTVAL (count_exp);
12569
      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12570
        return 0;
12571
    }
12572
 
12573
  /* Figure out proper mode for counter.  For 32bits it is always SImode,
12574
     for 64bits use SImode when possible, otherwise DImode.
12575
     Set count to number of bytes copied when known at compile time.  */
12576
  if (!TARGET_64BIT
12577
      || GET_MODE (count_exp) == SImode
12578
      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12579
    counter_mode = SImode;
12580
  else
12581
    counter_mode = DImode;
12582
 
12583
  gcc_assert (counter_mode == SImode || counter_mode == DImode);
12584
 
12585
  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12586
  if (destreg != XEXP (dst, 0))
12587
    dst = replace_equiv_address_nv (dst, destreg);
12588
  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12589
  if (srcreg != XEXP (src, 0))
12590
    src = replace_equiv_address_nv (src, srcreg);
12591
 
12592
  /* When optimizing for size emit simple rep ; movsb instruction for
12593
     counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12594
     sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12595
     Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12596
     count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12597
     but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12598
     known to be zero or not.  The rep; movsb sequence causes higher
12599
     register pressure though, so take that into account.  */
12600
 
12601
  if ((!optimize || optimize_size)
12602
      && (count == 0
12603
          || ((count & 0x03)
12604
              && (!optimize_size
12605
                  || count > 5 * 4
12606
                  || (count & 3) + count / 4 > 6))))
12607
    {
12608
      emit_insn (gen_cld ());
12609
      countreg = ix86_zero_extend_to_Pmode (count_exp);
12610
      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12611
      srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12612
      emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12613
                              destexp, srcexp));
12614
    }
12615
 
12616
  /* For constant aligned (or small unaligned) copies use rep movsl
12617
     followed by code copying the rest.  For PentiumPro ensure 8 byte
12618
     alignment to allow rep movsl acceleration.  */
12619
 
12620
  else if (count != 0
12621
           && (align >= 8
12622
               || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12623
               || optimize_size || count < (unsigned int) 64))
12624
    {
12625
      unsigned HOST_WIDE_INT offset = 0;
12626
      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12627
      rtx srcmem, dstmem;
12628
 
12629
      emit_insn (gen_cld ());
12630
      if (count & ~(size - 1))
12631
        {
12632
          if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12633
            {
12634
              enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12635
 
12636
              while (offset < (count & ~(size - 1)))
12637
                {
12638
                  srcmem = adjust_automodify_address_nv (src, movs_mode,
12639
                                                         srcreg, offset);
12640
                  dstmem = adjust_automodify_address_nv (dst, movs_mode,
12641
                                                         destreg, offset);
12642
                  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12643
                  offset += size;
12644
                }
12645
            }
12646
          else
12647
            {
12648
              countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12649
                                  & (TARGET_64BIT ? -1 : 0x3fffffff));
12650
              countreg = copy_to_mode_reg (counter_mode, countreg);
12651
              countreg = ix86_zero_extend_to_Pmode (countreg);
12652
 
12653
              destexp = gen_rtx_ASHIFT (Pmode, countreg,
12654
                                        GEN_INT (size == 4 ? 2 : 3));
12655
              srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12656
              destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12657
 
12658
              emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12659
                                      countreg, destexp, srcexp));
12660
              offset = count & ~(size - 1);
12661
            }
12662
        }
12663
      if (size == 8 && (count & 0x04))
12664
        {
12665
          srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12666
                                                 offset);
12667
          dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12668
                                                 offset);
12669
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12670
          offset += 4;
12671
        }
12672
      if (count & 0x02)
12673
        {
12674
          srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12675
                                                 offset);
12676
          dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12677
                                                 offset);
12678
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12679
          offset += 2;
12680
        }
12681
      if (count & 0x01)
12682
        {
12683
          srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12684
                                                 offset);
12685
          dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12686
                                                 offset);
12687
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12688
        }
12689
    }
12690
  /* The generic code based on the glibc implementation:
12691
     - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12692
     allowing accelerated copying there)
12693
     - copy the data using rep movsl
12694
     - copy the rest.  */
12695
  else
12696
    {
12697
      rtx countreg2;
12698
      rtx label = NULL;
12699
      rtx srcmem, dstmem;
12700
      int desired_alignment = (TARGET_PENTIUMPRO
12701
                               && (count == 0 || count >= (unsigned int) 260)
12702
                               ? 8 : UNITS_PER_WORD);
12703
      /* Get rid of MEM_OFFSETs, they won't be accurate.  */
12704
      dst = change_address (dst, BLKmode, destreg);
12705
      src = change_address (src, BLKmode, srcreg);
12706
 
12707
      /* In case we don't know anything about the alignment, default to
12708
         library version, since it is usually equally fast and result in
12709
         shorter code.
12710
 
12711
         Also emit call when we know that the count is large and call overhead
12712
         will not be important.  */
12713
      if (!TARGET_INLINE_ALL_STRINGOPS
12714
          && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12715
        return 0;
12716
 
12717
      if (TARGET_SINGLE_STRINGOP)
12718
        emit_insn (gen_cld ());
12719
 
12720
      countreg2 = gen_reg_rtx (Pmode);
12721
      countreg = copy_to_mode_reg (counter_mode, count_exp);
12722
 
12723
      /* We don't use loops to align destination and to copy parts smaller
12724
         than 4 bytes, because gcc is able to optimize such code better (in
12725
         the case the destination or the count really is aligned, gcc is often
12726
         able to predict the branches) and also it is friendlier to the
12727
         hardware branch prediction.
12728
 
12729
         Using loops is beneficial for generic case, because we can
12730
         handle small counts using the loops.  Many CPUs (such as Athlon)
12731
         have large REP prefix setup costs.
12732
 
12733
         This is quite costly.  Maybe we can revisit this decision later or
12734
         add some customizability to this code.  */
12735
 
12736
      if (count == 0 && align < desired_alignment)
12737
        {
12738
          label = gen_label_rtx ();
12739
          emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12740
                                   LEU, 0, counter_mode, 1, label);
12741
        }
12742
      if (align <= 1)
12743
        {
12744
          rtx label = ix86_expand_aligntest (destreg, 1);
12745
          srcmem = change_address (src, QImode, srcreg);
12746
          dstmem = change_address (dst, QImode, destreg);
12747
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12748
          ix86_adjust_counter (countreg, 1);
12749
          emit_label (label);
12750
          LABEL_NUSES (label) = 1;
12751
        }
12752
      if (align <= 2)
12753
        {
12754
          rtx label = ix86_expand_aligntest (destreg, 2);
12755
          srcmem = change_address (src, HImode, srcreg);
12756
          dstmem = change_address (dst, HImode, destreg);
12757
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12758
          ix86_adjust_counter (countreg, 2);
12759
          emit_label (label);
12760
          LABEL_NUSES (label) = 1;
12761
        }
12762
      if (align <= 4 && desired_alignment > 4)
12763
        {
12764
          rtx label = ix86_expand_aligntest (destreg, 4);
12765
          srcmem = change_address (src, SImode, srcreg);
12766
          dstmem = change_address (dst, SImode, destreg);
12767
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12768
          ix86_adjust_counter (countreg, 4);
12769
          emit_label (label);
12770
          LABEL_NUSES (label) = 1;
12771
        }
12772
 
12773
      if (label && desired_alignment > 4 && !TARGET_64BIT)
12774
        {
12775
          emit_label (label);
12776
          LABEL_NUSES (label) = 1;
12777
          label = NULL_RTX;
12778
        }
12779
      if (!TARGET_SINGLE_STRINGOP)
12780
        emit_insn (gen_cld ());
12781
      if (TARGET_64BIT)
12782
        {
12783
          emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12784
                                  GEN_INT (3)));
12785
          destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12786
        }
12787
      else
12788
        {
12789
          emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12790
          destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12791
        }
12792
      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12793
      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12794
      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12795
                              countreg2, destexp, srcexp));
12796
 
12797
      if (label)
12798
        {
12799
          emit_label (label);
12800
          LABEL_NUSES (label) = 1;
12801
        }
12802
      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12803
        {
12804
          srcmem = change_address (src, SImode, srcreg);
12805
          dstmem = change_address (dst, SImode, destreg);
12806
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12807
        }
12808
      if ((align <= 4 || count == 0) && TARGET_64BIT)
12809
        {
12810
          rtx label = ix86_expand_aligntest (countreg, 4);
12811
          srcmem = change_address (src, SImode, srcreg);
12812
          dstmem = change_address (dst, SImode, destreg);
12813
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12814
          emit_label (label);
12815
          LABEL_NUSES (label) = 1;
12816
        }
12817
      if (align > 2 && count != 0 && (count & 2))
12818
        {
12819
          srcmem = change_address (src, HImode, srcreg);
12820
          dstmem = change_address (dst, HImode, destreg);
12821
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12822
        }
12823
      if (align <= 2 || count == 0)
12824
        {
12825
          rtx label = ix86_expand_aligntest (countreg, 2);
12826
          srcmem = change_address (src, HImode, srcreg);
12827
          dstmem = change_address (dst, HImode, destreg);
12828
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12829
          emit_label (label);
12830
          LABEL_NUSES (label) = 1;
12831
        }
12832
      if (align > 1 && count != 0 && (count & 1))
12833
        {
12834
          srcmem = change_address (src, QImode, srcreg);
12835
          dstmem = change_address (dst, QImode, destreg);
12836
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12837
        }
12838
      if (align <= 1 || count == 0)
12839
        {
12840
          rtx label = ix86_expand_aligntest (countreg, 1);
12841
          srcmem = change_address (src, QImode, srcreg);
12842
          dstmem = change_address (dst, QImode, destreg);
12843
          emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12844
          emit_label (label);
12845
          LABEL_NUSES (label) = 1;
12846
        }
12847
    }
12848
 
12849
  return 1;
12850
}
12851
 
12852
/* Expand string clear operation (bzero).  Use i386 string operations when
12853
   profitable.  expand_movmem contains similar code.  */
12854
int
12855
ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12856
{
12857
  rtx destreg, zeroreg, countreg, destexp;
12858
  enum machine_mode counter_mode;
12859
  HOST_WIDE_INT align = 0;
12860
  unsigned HOST_WIDE_INT count = 0;
12861
 
12862
  if (GET_CODE (align_exp) == CONST_INT)
12863
    align = INTVAL (align_exp);
12864
 
12865
  /* Can't use any of this if the user has appropriated esi.  */
12866
  if (global_regs[4])
12867
    return 0;
12868
 
12869
  /* This simple hack avoids all inlining code and simplifies code below.  */
12870
  if (!TARGET_ALIGN_STRINGOPS)
12871
    align = 32;
12872
 
12873
  if (GET_CODE (count_exp) == CONST_INT)
12874
    {
12875
      count = INTVAL (count_exp);
12876
      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12877
        return 0;
12878
    }
12879
  /* Figure out proper mode for counter.  For 32bits it is always SImode,
12880
     for 64bits use SImode when possible, otherwise DImode.
12881
     Set count to number of bytes copied when known at compile time.  */
12882
  if (!TARGET_64BIT
12883
      || GET_MODE (count_exp) == SImode
12884
      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12885
    counter_mode = SImode;
12886
  else
12887
    counter_mode = DImode;
12888
 
12889
  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12890
  if (destreg != XEXP (dst, 0))
12891
    dst = replace_equiv_address_nv (dst, destreg);
12892
 
12893
 
12894
  /* When optimizing for size emit simple rep ; movsb instruction for
12895
     counts not divisible by 4.  The movl $N, %ecx; rep; stosb
12896
     sequence is 7 bytes long, so if optimizing for size and count is
12897
     small enough that some stosl, stosw and stosb instructions without
12898
     rep are shorter, fall back into the next if.  */
12899
 
12900
  if ((!optimize || optimize_size)
12901
      && (count == 0
12902
          || ((count & 0x03)
12903
              && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12904
    {
12905
      emit_insn (gen_cld ());
12906
 
12907
      countreg = ix86_zero_extend_to_Pmode (count_exp);
12908
      zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12909
      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12910
      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12911
    }
12912
  else if (count != 0
12913
           && (align >= 8
12914
               || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12915
               || optimize_size || count < (unsigned int) 64))
12916
    {
12917
      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12918
      unsigned HOST_WIDE_INT offset = 0;
12919
 
12920
      emit_insn (gen_cld ());
12921
 
12922
      zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12923
      if (count & ~(size - 1))
12924
        {
12925
          unsigned HOST_WIDE_INT repcount;
12926
          unsigned int max_nonrep;
12927
 
12928
          repcount = count >> (size == 4 ? 2 : 3);
12929
          if (!TARGET_64BIT)
12930
            repcount &= 0x3fffffff;
12931
 
12932
          /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12933
             movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12934
             bytes.  In both cases the latter seems to be faster for small
12935
             values of N.  */
12936
          max_nonrep = size == 4 ? 7 : 4;
12937
          if (!optimize_size)
12938
            switch (ix86_tune)
12939
              {
12940
              case PROCESSOR_PENTIUM4:
12941
              case PROCESSOR_NOCONA:
12942
                max_nonrep = 3;
12943
                break;
12944
              default:
12945
                break;
12946
              }
12947
 
12948
          if (repcount <= max_nonrep)
12949
            while (repcount-- > 0)
12950
              {
12951
                rtx mem = adjust_automodify_address_nv (dst,
12952
                                                        GET_MODE (zeroreg),
12953
                                                        destreg, offset);
12954
                emit_insn (gen_strset (destreg, mem, zeroreg));
12955
                offset += size;
12956
              }
12957
          else
12958
            {
12959
              countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12960
              countreg = ix86_zero_extend_to_Pmode (countreg);
12961
              destexp = gen_rtx_ASHIFT (Pmode, countreg,
12962
                                        GEN_INT (size == 4 ? 2 : 3));
12963
              destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12964
              emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12965
                                       destexp));
12966
              offset = count & ~(size - 1);
12967
            }
12968
        }
12969
      if (size == 8 && (count & 0x04))
12970
        {
12971
          rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12972
                                                  offset);
12973
          emit_insn (gen_strset (destreg, mem,
12974
                                 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12975
          offset += 4;
12976
        }
12977
      if (count & 0x02)
12978
        {
12979
          rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12980
                                                  offset);
12981
          emit_insn (gen_strset (destreg, mem,
12982
                                 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12983
          offset += 2;
12984
        }
12985
      if (count & 0x01)
12986
        {
12987
          rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12988
                                                  offset);
12989
          emit_insn (gen_strset (destreg, mem,
12990
                                 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12991
        }
12992
    }
12993
  else
12994
    {
12995
      rtx countreg2;
12996
      rtx label = NULL;
12997
      /* Compute desired alignment of the string operation.  */
12998
      int desired_alignment = (TARGET_PENTIUMPRO
12999
                               && (count == 0 || count >= (unsigned int) 260)
13000
                               ? 8 : UNITS_PER_WORD);
13001
 
13002
      /* In case we don't know anything about the alignment, default to
13003
         library version, since it is usually equally fast and result in
13004
         shorter code.
13005
 
13006
         Also emit call when we know that the count is large and call overhead
13007
         will not be important.  */
13008
      if (!TARGET_INLINE_ALL_STRINGOPS
13009
          && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13010
        return 0;
13011
 
13012
      if (TARGET_SINGLE_STRINGOP)
13013
        emit_insn (gen_cld ());
13014
 
13015
      countreg2 = gen_reg_rtx (Pmode);
13016
      countreg = copy_to_mode_reg (counter_mode, count_exp);
13017
      zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13018
      /* Get rid of MEM_OFFSET, it won't be accurate.  */
13019
      dst = change_address (dst, BLKmode, destreg);
13020
 
13021
      if (count == 0 && align < desired_alignment)
13022
        {
13023
          label = gen_label_rtx ();
13024
          emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13025
                                   LEU, 0, counter_mode, 1, label);
13026
        }
13027
      if (align <= 1)
13028
        {
13029
          rtx label = ix86_expand_aligntest (destreg, 1);
13030
          emit_insn (gen_strset (destreg, dst,
13031
                                 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13032
          ix86_adjust_counter (countreg, 1);
13033
          emit_label (label);
13034
          LABEL_NUSES (label) = 1;
13035
        }
13036
      if (align <= 2)
13037
        {
13038
          rtx label = ix86_expand_aligntest (destreg, 2);
13039
          emit_insn (gen_strset (destreg, dst,
13040
                                 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13041
          ix86_adjust_counter (countreg, 2);
13042
          emit_label (label);
13043
          LABEL_NUSES (label) = 1;
13044
        }
13045
      if (align <= 4 && desired_alignment > 4)
13046
        {
13047
          rtx label = ix86_expand_aligntest (destreg, 4);
13048
          emit_insn (gen_strset (destreg, dst,
13049
                                 (TARGET_64BIT
13050
                                  ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13051
                                  : zeroreg)));
13052
          ix86_adjust_counter (countreg, 4);
13053
          emit_label (label);
13054
          LABEL_NUSES (label) = 1;
13055
        }
13056
 
13057
      if (label && desired_alignment > 4 && !TARGET_64BIT)
13058
        {
13059
          emit_label (label);
13060
          LABEL_NUSES (label) = 1;
13061
          label = NULL_RTX;
13062
        }
13063
 
13064
      if (!TARGET_SINGLE_STRINGOP)
13065
        emit_insn (gen_cld ());
13066
      if (TARGET_64BIT)
13067
        {
13068
          emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13069
                                  GEN_INT (3)));
13070
          destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13071
        }
13072
      else
13073
        {
13074
          emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13075
          destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13076
        }
13077
      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13078
      emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13079
 
13080
      if (label)
13081
        {
13082
          emit_label (label);
13083
          LABEL_NUSES (label) = 1;
13084
        }
13085
 
13086
      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13087
        emit_insn (gen_strset (destreg, dst,
13088
                               gen_rtx_SUBREG (SImode, zeroreg, 0)));
13089
      if (TARGET_64BIT && (align <= 4 || count == 0))
13090
        {
13091
          rtx label = ix86_expand_aligntest (countreg, 4);
13092
          emit_insn (gen_strset (destreg, dst,
13093
                                 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13094
          emit_label (label);
13095
          LABEL_NUSES (label) = 1;
13096
        }
13097
      if (align > 2 && count != 0 && (count & 2))
13098
        emit_insn (gen_strset (destreg, dst,
13099
                               gen_rtx_SUBREG (HImode, zeroreg, 0)));
13100
      if (align <= 2 || count == 0)
13101
        {
13102
          rtx label = ix86_expand_aligntest (countreg, 2);
13103
          emit_insn (gen_strset (destreg, dst,
13104
                                 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13105
          emit_label (label);
13106
          LABEL_NUSES (label) = 1;
13107
        }
13108
      if (align > 1 && count != 0 && (count & 1))
13109
        emit_insn (gen_strset (destreg, dst,
13110
                               gen_rtx_SUBREG (QImode, zeroreg, 0)));
13111
      if (align <= 1 || count == 0)
13112
        {
13113
          rtx label = ix86_expand_aligntest (countreg, 1);
13114
          emit_insn (gen_strset (destreg, dst,
13115
                                 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13116
          emit_label (label);
13117
          LABEL_NUSES (label) = 1;
13118
        }
13119
    }
13120
  return 1;
13121
}
13122
 
13123
/* Expand strlen.  */
13124
int
13125
ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13126
{
13127
  rtx addr, scratch1, scratch2, scratch3, scratch4;
13128
 
13129
  /* The generic case of strlen expander is long.  Avoid it's
13130
     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
13131
 
13132
  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13133
      && !TARGET_INLINE_ALL_STRINGOPS
13134
      && !optimize_size
13135
      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13136
    return 0;
13137
 
13138
  addr = force_reg (Pmode, XEXP (src, 0));
13139
  scratch1 = gen_reg_rtx (Pmode);
13140
 
13141
  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13142
      && !optimize_size)
13143
    {
13144
      /* Well it seems that some optimizer does not combine a call like
13145
         foo(strlen(bar), strlen(bar));
13146
         when the move and the subtraction is done here.  It does calculate
13147
         the length just once when these instructions are done inside of
13148
         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
13149
         often used and I use one fewer register for the lifetime of
13150
         output_strlen_unroll() this is better.  */
13151
 
13152
      emit_move_insn (out, addr);
13153
 
13154
      ix86_expand_strlensi_unroll_1 (out, src, align);
13155
 
13156
      /* strlensi_unroll_1 returns the address of the zero at the end of
13157
         the string, like memchr(), so compute the length by subtracting
13158
         the start address.  */
13159
      if (TARGET_64BIT)
13160
        emit_insn (gen_subdi3 (out, out, addr));
13161
      else
13162
        emit_insn (gen_subsi3 (out, out, addr));
13163
    }
13164
  else
13165
    {
13166
      rtx unspec;
13167
      scratch2 = gen_reg_rtx (Pmode);
13168
      scratch3 = gen_reg_rtx (Pmode);
13169
      scratch4 = force_reg (Pmode, constm1_rtx);
13170
 
13171
      emit_move_insn (scratch3, addr);
13172
      eoschar = force_reg (QImode, eoschar);
13173
 
13174
      emit_insn (gen_cld ());
13175
      src = replace_equiv_address_nv (src, scratch3);
13176
 
13177
      /* If .md starts supporting :P, this can be done in .md.  */
13178
      unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13179
                                                 scratch4), UNSPEC_SCAS);
13180
      emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13181
      if (TARGET_64BIT)
13182
        {
13183
          emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13184
          emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13185
        }
13186
      else
13187
        {
13188
          emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13189
          emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13190
        }
13191
    }
13192
  return 1;
13193
}
13194
 
13195
/* Expand the appropriate insns for doing strlen if not just doing
13196
   repnz; scasb
13197
 
13198
   out = result, initialized with the start address
13199
   align_rtx = alignment of the address.
13200
   scratch = scratch register, initialized with the startaddress when
13201
        not aligned, otherwise undefined
13202
 
13203
   This is just the body. It needs the initializations mentioned above and
13204
   some address computing at the end.  These things are done in i386.md.  */
13205
 
13206
static void
13207
ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13208
{
13209
  int align;
13210
  rtx tmp;
13211
  rtx align_2_label = NULL_RTX;
13212
  rtx align_3_label = NULL_RTX;
13213
  rtx align_4_label = gen_label_rtx ();
13214
  rtx end_0_label = gen_label_rtx ();
13215
  rtx mem;
13216
  rtx tmpreg = gen_reg_rtx (SImode);
13217
  rtx scratch = gen_reg_rtx (SImode);
13218
  rtx cmp;
13219
 
13220
  align = 0;
13221
  if (GET_CODE (align_rtx) == CONST_INT)
13222
    align = INTVAL (align_rtx);
13223
 
13224
  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
13225
 
13226
  /* Is there a known alignment and is it less than 4?  */
13227
  if (align < 4)
13228
    {
13229
      rtx scratch1 = gen_reg_rtx (Pmode);
13230
      emit_move_insn (scratch1, out);
13231
      /* Is there a known alignment and is it not 2? */
13232
      if (align != 2)
13233
        {
13234
          align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13235
          align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13236
 
13237
          /* Leave just the 3 lower bits.  */
13238
          align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13239
                                    NULL_RTX, 0, OPTAB_WIDEN);
13240
 
13241
          emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13242
                                   Pmode, 1, align_4_label);
13243
          emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13244
                                   Pmode, 1, align_2_label);
13245
          emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13246
                                   Pmode, 1, align_3_label);
13247
        }
13248
      else
13249
        {
13250
          /* Since the alignment is 2, we have to check 2 or 0 bytes;
13251
             check if is aligned to 4 - byte.  */
13252
 
13253
          align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13254
                                    NULL_RTX, 0, OPTAB_WIDEN);
13255
 
13256
          emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13257
                                   Pmode, 1, align_4_label);
13258
        }
13259
 
13260
      mem = change_address (src, QImode, out);
13261
 
13262
      /* Now compare the bytes.  */
13263
 
13264
      /* Compare the first n unaligned byte on a byte per byte basis.  */
13265
      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13266
                               QImode, 1, end_0_label);
13267
 
13268
      /* Increment the address.  */
13269
      if (TARGET_64BIT)
13270
        emit_insn (gen_adddi3 (out, out, const1_rtx));
13271
      else
13272
        emit_insn (gen_addsi3 (out, out, const1_rtx));
13273
 
13274
      /* Not needed with an alignment of 2 */
13275
      if (align != 2)
13276
        {
13277
          emit_label (align_2_label);
13278
 
13279
          emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13280
                                   end_0_label);
13281
 
13282
          if (TARGET_64BIT)
13283
            emit_insn (gen_adddi3 (out, out, const1_rtx));
13284
          else
13285
            emit_insn (gen_addsi3 (out, out, const1_rtx));
13286
 
13287
          emit_label (align_3_label);
13288
        }
13289
 
13290
      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13291
                               end_0_label);
13292
 
13293
      if (TARGET_64BIT)
13294
        emit_insn (gen_adddi3 (out, out, const1_rtx));
13295
      else
13296
        emit_insn (gen_addsi3 (out, out, const1_rtx));
13297
    }
13298
 
13299
  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
13300
     align this loop.  It gives only huge programs, but does not help to
13301
     speed up.  */
13302
  emit_label (align_4_label);
13303
 
13304
  mem = change_address (src, SImode, out);
13305
  emit_move_insn (scratch, mem);
13306
  if (TARGET_64BIT)
13307
    emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13308
  else
13309
    emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13310
 
13311
  /* This formula yields a nonzero result iff one of the bytes is zero.
13312
     This saves three branches inside loop and many cycles.  */
13313
 
13314
  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13315
  emit_insn (gen_one_cmplsi2 (scratch, scratch));
13316
  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13317
  emit_insn (gen_andsi3 (tmpreg, tmpreg,
13318
                         gen_int_mode (0x80808080, SImode)));
13319
  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13320
                           align_4_label);
13321
 
13322
  if (TARGET_CMOVE)
13323
    {
13324
       rtx reg = gen_reg_rtx (SImode);
13325
       rtx reg2 = gen_reg_rtx (Pmode);
13326
       emit_move_insn (reg, tmpreg);
13327
       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13328
 
13329
       /* If zero is not in the first two bytes, move two bytes forward.  */
13330
       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13331
       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13332
       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13333
       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13334
                               gen_rtx_IF_THEN_ELSE (SImode, tmp,
13335
                                                     reg,
13336
                                                     tmpreg)));
13337
       /* Emit lea manually to avoid clobbering of flags.  */
13338
       emit_insn (gen_rtx_SET (SImode, reg2,
13339
                               gen_rtx_PLUS (Pmode, out, const2_rtx)));
13340
 
13341
       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13342
       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13343
       emit_insn (gen_rtx_SET (VOIDmode, out,
13344
                               gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13345
                                                     reg2,
13346
                                                     out)));
13347
 
13348
    }
13349
  else
13350
    {
13351
       rtx end_2_label = gen_label_rtx ();
13352
       /* Is zero in the first two bytes? */
13353
 
13354
       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13355
       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13356
       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13357
       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13358
                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13359
                            pc_rtx);
13360
       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13361
       JUMP_LABEL (tmp) = end_2_label;
13362
 
13363
       /* Not in the first two.  Move two bytes forward.  */
13364
       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13365
       if (TARGET_64BIT)
13366
         emit_insn (gen_adddi3 (out, out, const2_rtx));
13367
       else
13368
         emit_insn (gen_addsi3 (out, out, const2_rtx));
13369
 
13370
       emit_label (end_2_label);
13371
 
13372
    }
13373
 
13374
  /* Avoid branch in fixing the byte.  */
13375
  tmpreg = gen_lowpart (QImode, tmpreg);
13376
  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13377
  cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13378
  if (TARGET_64BIT)
13379
    emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13380
  else
13381
    emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13382
 
13383
  emit_label (end_0_label);
13384
}
13385
 
13386
void
13387
ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13388
                  rtx callarg2 ATTRIBUTE_UNUSED,
13389
                  rtx pop, int sibcall)
13390
{
13391
  rtx use = NULL, call;
13392
 
13393
  if (pop == const0_rtx)
13394
    pop = NULL;
13395
  gcc_assert (!TARGET_64BIT || !pop);
13396
 
13397
  if (TARGET_MACHO && !TARGET_64BIT)
13398
    {
13399
#if TARGET_MACHO
13400
      if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13401
        fnaddr = machopic_indirect_call_target (fnaddr);
13402
#endif
13403
    }
13404
  else
13405
    {
13406
      /* Static functions and indirect calls don't need the pic register.  */
13407
      if (! TARGET_64BIT && flag_pic
13408
          && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13409
          && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13410
        use_reg (&use, pic_offset_table_rtx);
13411
    }
13412
 
13413
  if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13414
    {
13415
      rtx al = gen_rtx_REG (QImode, 0);
13416
      emit_move_insn (al, callarg2);
13417
      use_reg (&use, al);
13418
    }
13419
 
13420
  if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13421
    {
13422
      fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13423
      fnaddr = gen_rtx_MEM (QImode, fnaddr);
13424
    }
13425
  if (sibcall && TARGET_64BIT
13426
      && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13427
    {
13428
      rtx addr;
13429
      addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13430
      fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13431
      emit_move_insn (fnaddr, addr);
13432
      fnaddr = gen_rtx_MEM (QImode, fnaddr);
13433
    }
13434
 
13435
  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13436
  if (retval)
13437
    call = gen_rtx_SET (VOIDmode, retval, call);
13438
  if (pop)
13439
    {
13440
      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13441
      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13442
      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13443
    }
13444
 
13445
  call = emit_call_insn (call);
13446
  if (use)
13447
    CALL_INSN_FUNCTION_USAGE (call) = use;
13448
}
13449
 
13450
 
13451
/* Clear stack slot assignments remembered from previous functions.
13452
   This is called from INIT_EXPANDERS once before RTL is emitted for each
13453
   function.  */
13454
 
13455
static struct machine_function *
13456
ix86_init_machine_status (void)
13457
{
13458
  struct machine_function *f;
13459
 
13460
  f = ggc_alloc_cleared (sizeof (struct machine_function));
13461
  f->use_fast_prologue_epilogue_nregs = -1;
13462
  f->tls_descriptor_call_expanded_p = 0;
13463
 
13464
  return f;
13465
}
13466
 
13467
/* Return a MEM corresponding to a stack slot with mode MODE.
13468
   Allocate a new slot if necessary.
13469
 
13470
   The RTL for a function can have several slots available: N is
13471
   which slot to use.  */
13472
 
13473
rtx
13474
assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13475
{
13476
  struct stack_local_entry *s;
13477
 
13478
  gcc_assert (n < MAX_386_STACK_LOCALS);
13479
 
13480
  /* Virtual slot is valid only before vregs are instantiated.  */
13481
  gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13482
 
13483
  for (s = ix86_stack_locals; s; s = s->next)
13484
    if (s->mode == mode && s->n == n)
13485
      return s->rtl;
13486
 
13487
  s = (struct stack_local_entry *)
13488
    ggc_alloc (sizeof (struct stack_local_entry));
13489
  s->n = n;
13490
  s->mode = mode;
13491
  s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13492
 
13493
  s->next = ix86_stack_locals;
13494
  ix86_stack_locals = s;
13495
  return s->rtl;
13496
}
13497
 
13498
/* Construct the SYMBOL_REF for the tls_get_addr function.  */
13499
 
13500
static GTY(()) rtx ix86_tls_symbol;
13501
rtx
13502
ix86_tls_get_addr (void)
13503
{
13504
 
13505
  if (!ix86_tls_symbol)
13506
    {
13507
      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13508
                                            (TARGET_ANY_GNU_TLS
13509
                                             && !TARGET_64BIT)
13510
                                            ? "___tls_get_addr"
13511
                                            : "__tls_get_addr");
13512
    }
13513
 
13514
  return ix86_tls_symbol;
13515
}
13516
 
13517
/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
13518
 
13519
static GTY(()) rtx ix86_tls_module_base_symbol;
13520
rtx
13521
ix86_tls_module_base (void)
13522
{
13523
 
13524
  if (!ix86_tls_module_base_symbol)
13525
    {
13526
      ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13527
                                                        "_TLS_MODULE_BASE_");
13528
      SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13529
        |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13530
    }
13531
 
13532
  return ix86_tls_module_base_symbol;
13533
}
13534
 
13535
/* Calculate the length of the memory address in the instruction
13536
   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
13537
 
13538
int
13539
memory_address_length (rtx addr)
13540
{
13541
  struct ix86_address parts;
13542
  rtx base, index, disp;
13543
  int len;
13544
  int ok;
13545
 
13546
  if (GET_CODE (addr) == PRE_DEC
13547
      || GET_CODE (addr) == POST_INC
13548
      || GET_CODE (addr) == PRE_MODIFY
13549
      || GET_CODE (addr) == POST_MODIFY)
13550
    return 0;
13551
 
13552
  ok = ix86_decompose_address (addr, &parts);
13553
  gcc_assert (ok);
13554
 
13555
  if (parts.base && GET_CODE (parts.base) == SUBREG)
13556
    parts.base = SUBREG_REG (parts.base);
13557
  if (parts.index && GET_CODE (parts.index) == SUBREG)
13558
    parts.index = SUBREG_REG (parts.index);
13559
 
13560
  base = parts.base;
13561
  index = parts.index;
13562
  disp = parts.disp;
13563
  len = 0;
13564
 
13565
  /* Rule of thumb:
13566
       - esp as the base always wants an index,
13567
       - ebp as the base always wants a displacement.  */
13568
 
13569
  /* Register Indirect.  */
13570
  if (base && !index && !disp)
13571
    {
13572
      /* esp (for its index) and ebp (for its displacement) need
13573
         the two-byte modrm form.  */
13574
      if (addr == stack_pointer_rtx
13575
          || addr == arg_pointer_rtx
13576
          || addr == frame_pointer_rtx
13577
          || addr == hard_frame_pointer_rtx)
13578
        len = 1;
13579
    }
13580
 
13581
  /* Direct Addressing.  */
13582
  else if (disp && !base && !index)
13583
    len = 4;
13584
 
13585
  else
13586
    {
13587
      /* Find the length of the displacement constant.  */
13588
      if (disp)
13589
        {
13590
          if (base && satisfies_constraint_K (disp))
13591
            len = 1;
13592
          else
13593
            len = 4;
13594
        }
13595
      /* ebp always wants a displacement.  */
13596
      else if (base == hard_frame_pointer_rtx)
13597
        len = 1;
13598
 
13599
      /* An index requires the two-byte modrm form....  */
13600
      if (index
13601
          /* ...like esp, which always wants an index.  */
13602
          || base == stack_pointer_rtx
13603
          || base == arg_pointer_rtx
13604
          || base == frame_pointer_rtx)
13605
        len += 1;
13606
    }
13607
 
13608
  return len;
13609
}
13610
 
13611
/* Compute default value for "length_immediate" attribute.  When SHORTFORM
13612
   is set, expect that insn have 8bit immediate alternative.  */
13613
int
13614
ix86_attr_length_immediate_default (rtx insn, int shortform)
13615
{
13616
  int len = 0;
13617
  int i;
13618
  extract_insn_cached (insn);
13619
  for (i = recog_data.n_operands - 1; i >= 0; --i)
13620
    if (CONSTANT_P (recog_data.operand[i]))
13621
      {
13622
        gcc_assert (!len);
13623
        if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13624
          len = 1;
13625
        else
13626
          {
13627
            switch (get_attr_mode (insn))
13628
              {
13629
                case MODE_QI:
13630
                  len+=1;
13631
                  break;
13632
                case MODE_HI:
13633
                  len+=2;
13634
                  break;
13635
                case MODE_SI:
13636
                  len+=4;
13637
                  break;
13638
                /* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
13639
                case MODE_DI:
13640
                  len+=4;
13641
                  break;
13642
                default:
13643
                  fatal_insn ("unknown insn mode", insn);
13644
              }
13645
          }
13646
      }
13647
  return len;
13648
}
13649
/* Compute default value for "length_address" attribute.  */
13650
int
13651
ix86_attr_length_address_default (rtx insn)
13652
{
13653
  int i;
13654
 
13655
  if (get_attr_type (insn) == TYPE_LEA)
13656
    {
13657
      rtx set = PATTERN (insn);
13658
 
13659
      if (GET_CODE (set) == PARALLEL)
13660
        set = XVECEXP (set, 0, 0);
13661
 
13662
      gcc_assert (GET_CODE (set) == SET);
13663
 
13664
      return memory_address_length (SET_SRC (set));
13665
    }
13666
 
13667
  extract_insn_cached (insn);
13668
  for (i = recog_data.n_operands - 1; i >= 0; --i)
13669
    if (GET_CODE (recog_data.operand[i]) == MEM)
13670
      {
13671
        return memory_address_length (XEXP (recog_data.operand[i], 0));
13672
        break;
13673
      }
13674
  return 0;
13675
}
13676
 
13677
/* Return the maximum number of instructions a cpu can issue.  */
13678
 
13679
static int
13680
ix86_issue_rate (void)
13681
{
13682
  switch (ix86_tune)
13683
    {
13684
    case PROCESSOR_PENTIUM:
13685
    case PROCESSOR_K6:
13686
      return 2;
13687
 
13688
    case PROCESSOR_PENTIUMPRO:
13689
    case PROCESSOR_PENTIUM4:
13690
    case PROCESSOR_ATHLON:
13691
    case PROCESSOR_K8:
13692
    case PROCESSOR_NOCONA:
13693
    case PROCESSOR_GENERIC32:
13694
    case PROCESSOR_GENERIC64:
13695
      return 3;
13696
 
13697
    default:
13698
      return 1;
13699
    }
13700
}
13701
 
13702
/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13703
   by DEP_INSN and nothing set by DEP_INSN.  */
13704
 
13705
static int
13706
ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13707
{
13708
  rtx set, set2;
13709
 
13710
  /* Simplify the test for uninteresting insns.  */
13711
  if (insn_type != TYPE_SETCC
13712
      && insn_type != TYPE_ICMOV
13713
      && insn_type != TYPE_FCMOV
13714
      && insn_type != TYPE_IBR)
13715
    return 0;
13716
 
13717
  if ((set = single_set (dep_insn)) != 0)
13718
    {
13719
      set = SET_DEST (set);
13720
      set2 = NULL_RTX;
13721
    }
13722
  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13723
           && XVECLEN (PATTERN (dep_insn), 0) == 2
13724
           && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13725
           && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13726
    {
13727
      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13728
      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13729
    }
13730
  else
13731
    return 0;
13732
 
13733
  if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13734
    return 0;
13735
 
13736
  /* This test is true if the dependent insn reads the flags but
13737
     not any other potentially set register.  */
13738
  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13739
    return 0;
13740
 
13741
  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13742
    return 0;
13743
 
13744
  return 1;
13745
}
13746
 
13747
/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13748
   address with operands set by DEP_INSN.  */
13749
 
13750
static int
13751
ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13752
{
13753
  rtx addr;
13754
 
13755
  if (insn_type == TYPE_LEA
13756
      && TARGET_PENTIUM)
13757
    {
13758
      addr = PATTERN (insn);
13759
 
13760
      if (GET_CODE (addr) == PARALLEL)
13761
        addr = XVECEXP (addr, 0, 0);
13762
 
13763
      gcc_assert (GET_CODE (addr) == SET);
13764
 
13765
      addr = SET_SRC (addr);
13766
    }
13767
  else
13768
    {
13769
      int i;
13770
      extract_insn_cached (insn);
13771
      for (i = recog_data.n_operands - 1; i >= 0; --i)
13772
        if (GET_CODE (recog_data.operand[i]) == MEM)
13773
          {
13774
            addr = XEXP (recog_data.operand[i], 0);
13775
            goto found;
13776
          }
13777
      return 0;
13778
    found:;
13779
    }
13780
 
13781
  return modified_in_p (addr, dep_insn);
13782
}
13783
 
13784
static int
13785
ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13786
{
13787
  enum attr_type insn_type, dep_insn_type;
13788
  enum attr_memory memory;
13789
  rtx set, set2;
13790
  int dep_insn_code_number;
13791
 
13792
  /* Anti and output dependencies have zero cost on all CPUs.  */
13793
  if (REG_NOTE_KIND (link) != 0)
13794
    return 0;
13795
 
13796
  dep_insn_code_number = recog_memoized (dep_insn);
13797
 
13798
  /* If we can't recognize the insns, we can't really do anything.  */
13799
  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13800
    return cost;
13801
 
13802
  insn_type = get_attr_type (insn);
13803
  dep_insn_type = get_attr_type (dep_insn);
13804
 
13805
  switch (ix86_tune)
13806
    {
13807
    case PROCESSOR_PENTIUM:
13808
      /* Address Generation Interlock adds a cycle of latency.  */
13809
      if (ix86_agi_dependent (insn, dep_insn, insn_type))
13810
        cost += 1;
13811
 
13812
      /* ??? Compares pair with jump/setcc.  */
13813
      if (ix86_flags_dependent (insn, dep_insn, insn_type))
13814
        cost = 0;
13815
 
13816
      /* Floating point stores require value to be ready one cycle earlier.  */
13817
      if (insn_type == TYPE_FMOV
13818
          && get_attr_memory (insn) == MEMORY_STORE
13819
          && !ix86_agi_dependent (insn, dep_insn, insn_type))
13820
        cost += 1;
13821
      break;
13822
 
13823
    case PROCESSOR_PENTIUMPRO:
13824
      memory = get_attr_memory (insn);
13825
 
13826
      /* INT->FP conversion is expensive.  */
13827
      if (get_attr_fp_int_src (dep_insn))
13828
        cost += 5;
13829
 
13830
      /* There is one cycle extra latency between an FP op and a store.  */
13831
      if (insn_type == TYPE_FMOV
13832
          && (set = single_set (dep_insn)) != NULL_RTX
13833
          && (set2 = single_set (insn)) != NULL_RTX
13834
          && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13835
          && GET_CODE (SET_DEST (set2)) == MEM)
13836
        cost += 1;
13837
 
13838
      /* Show ability of reorder buffer to hide latency of load by executing
13839
         in parallel with previous instruction in case
13840
         previous instruction is not needed to compute the address.  */
13841
      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13842
          && !ix86_agi_dependent (insn, dep_insn, insn_type))
13843
        {
13844
          /* Claim moves to take one cycle, as core can issue one load
13845
             at time and the next load can start cycle later.  */
13846
          if (dep_insn_type == TYPE_IMOV
13847
              || dep_insn_type == TYPE_FMOV)
13848
            cost = 1;
13849
          else if (cost > 1)
13850
            cost--;
13851
        }
13852
      break;
13853
 
13854
    case PROCESSOR_K6:
13855
      memory = get_attr_memory (insn);
13856
 
13857
      /* The esp dependency is resolved before the instruction is really
13858
         finished.  */
13859
      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13860
          && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13861
        return 1;
13862
 
13863
      /* INT->FP conversion is expensive.  */
13864
      if (get_attr_fp_int_src (dep_insn))
13865
        cost += 5;
13866
 
13867
      /* Show ability of reorder buffer to hide latency of load by executing
13868
         in parallel with previous instruction in case
13869
         previous instruction is not needed to compute the address.  */
13870
      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13871
          && !ix86_agi_dependent (insn, dep_insn, insn_type))
13872
        {
13873
          /* Claim moves to take one cycle, as core can issue one load
13874
             at time and the next load can start cycle later.  */
13875
          if (dep_insn_type == TYPE_IMOV
13876
              || dep_insn_type == TYPE_FMOV)
13877
            cost = 1;
13878
          else if (cost > 2)
13879
            cost -= 2;
13880
          else
13881
            cost = 1;
13882
        }
13883
      break;
13884
 
13885
    case PROCESSOR_ATHLON:
13886
    case PROCESSOR_K8:
13887
    case PROCESSOR_GENERIC32:
13888
    case PROCESSOR_GENERIC64:
13889
      memory = get_attr_memory (insn);
13890
 
13891
      /* Show ability of reorder buffer to hide latency of load by executing
13892
         in parallel with previous instruction in case
13893
         previous instruction is not needed to compute the address.  */
13894
      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13895
          && !ix86_agi_dependent (insn, dep_insn, insn_type))
13896
        {
13897
          enum attr_unit unit = get_attr_unit (insn);
13898
          int loadcost = 3;
13899
 
13900
          /* Because of the difference between the length of integer and
13901
             floating unit pipeline preparation stages, the memory operands
13902
             for floating point are cheaper.
13903
 
13904
             ??? For Athlon it the difference is most probably 2.  */
13905
          if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13906
            loadcost = 3;
13907
          else
13908
            loadcost = TARGET_ATHLON ? 2 : 0;
13909
 
13910
          if (cost >= loadcost)
13911
            cost -= loadcost;
13912
          else
13913
            cost = 0;
13914
        }
13915
 
13916
    default:
13917
      break;
13918
    }
13919
 
13920
  return cost;
13921
}
13922
 
13923
/* How many alternative schedules to try.  This should be as wide as the
13924
   scheduling freedom in the DFA, but no wider.  Making this value too
13925
   large results extra work for the scheduler.  */
13926
 
13927
static int
13928
ia32_multipass_dfa_lookahead (void)
13929
{
13930
  if (ix86_tune == PROCESSOR_PENTIUM)
13931
    return 2;
13932
 
13933
  if (ix86_tune == PROCESSOR_PENTIUMPRO
13934
      || ix86_tune == PROCESSOR_K6)
13935
    return 1;
13936
 
13937
  else
13938
    return 0;
13939
}
13940
 
13941
 
13942
/* Compute the alignment given to a constant that is being placed in memory.
13943
   EXP is the constant and ALIGN is the alignment that the object would
13944
   ordinarily have.
13945
   The value of this function is used instead of that alignment to align
13946
   the object.  */
13947
 
13948
int
13949
ix86_constant_alignment (tree exp, int align)
13950
{
13951
  if (TREE_CODE (exp) == REAL_CST)
13952
    {
13953
      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13954
        return 64;
13955
      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13956
        return 128;
13957
    }
13958
  else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13959
           && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13960
    return BITS_PER_WORD;
13961
 
13962
  return align;
13963
}
13964
 
13965
/* Compute the alignment for a static variable.
13966
   TYPE is the data type, and ALIGN is the alignment that
13967
   the object would ordinarily have.  The value of this function is used
13968
   instead of that alignment to align the object.  */
13969
 
13970
int
13971
ix86_data_alignment (tree type, int align)
13972
{
13973
  int max_align = optimize_size ? BITS_PER_WORD : 256;
13974
 
13975
  if (AGGREGATE_TYPE_P (type)
13976
      && TYPE_SIZE (type)
13977
      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13978
      && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13979
          || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13980
      && align < max_align)
13981
    align = max_align;
13982
 
13983
  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13984
     to 16byte boundary.  */
13985
  if (TARGET_64BIT)
13986
    {
13987
      if (AGGREGATE_TYPE_P (type)
13988
           && TYPE_SIZE (type)
13989
           && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13990
           && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13991
               || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13992
        return 128;
13993
    }
13994
 
13995
  if (TREE_CODE (type) == ARRAY_TYPE)
13996
    {
13997
      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13998
        return 64;
13999
      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14000
        return 128;
14001
    }
14002
  else if (TREE_CODE (type) == COMPLEX_TYPE)
14003
    {
14004
 
14005
      if (TYPE_MODE (type) == DCmode && align < 64)
14006
        return 64;
14007
      if (TYPE_MODE (type) == XCmode && align < 128)
14008
        return 128;
14009
    }
14010
  else if ((TREE_CODE (type) == RECORD_TYPE
14011
            || TREE_CODE (type) == UNION_TYPE
14012
            || TREE_CODE (type) == QUAL_UNION_TYPE)
14013
           && TYPE_FIELDS (type))
14014
    {
14015
      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14016
        return 64;
14017
      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14018
        return 128;
14019
    }
14020
  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14021
           || TREE_CODE (type) == INTEGER_TYPE)
14022
    {
14023
      if (TYPE_MODE (type) == DFmode && align < 64)
14024
        return 64;
14025
      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14026
        return 128;
14027
    }
14028
 
14029
  return align;
14030
}
14031
 
14032
/* Compute the alignment for a local variable.
14033
   TYPE is the data type, and ALIGN is the alignment that
14034
   the object would ordinarily have.  The value of this macro is used
14035
   instead of that alignment to align the object.  */
14036
 
14037
int
14038
ix86_local_alignment (tree type, int align)
14039
{
14040
  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14041
     to 16byte boundary.  */
14042
  if (TARGET_64BIT)
14043
    {
14044
      if (AGGREGATE_TYPE_P (type)
14045
           && TYPE_SIZE (type)
14046
           && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14047
           && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14048
               || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14049
        return 128;
14050
    }
14051
  if (TREE_CODE (type) == ARRAY_TYPE)
14052
    {
14053
      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14054
        return 64;
14055
      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14056
        return 128;
14057
    }
14058
  else if (TREE_CODE (type) == COMPLEX_TYPE)
14059
    {
14060
      if (TYPE_MODE (type) == DCmode && align < 64)
14061
        return 64;
14062
      if (TYPE_MODE (type) == XCmode && align < 128)
14063
        return 128;
14064
    }
14065
  else if ((TREE_CODE (type) == RECORD_TYPE
14066
            || TREE_CODE (type) == UNION_TYPE
14067
            || TREE_CODE (type) == QUAL_UNION_TYPE)
14068
           && TYPE_FIELDS (type))
14069
    {
14070
      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14071
        return 64;
14072
      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14073
        return 128;
14074
    }
14075
  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14076
           || TREE_CODE (type) == INTEGER_TYPE)
14077
    {
14078
 
14079
      if (TYPE_MODE (type) == DFmode && align < 64)
14080
        return 64;
14081
      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14082
        return 128;
14083
    }
14084
  return align;
14085
}
14086
 
14087
/* Emit RTL insns to initialize the variable parts of a trampoline.
14088
   FNADDR is an RTX for the address of the function's pure code.
14089
   CXT is an RTX for the static chain value for the function.  */
14090
void
14091
x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14092
{
14093
  if (!TARGET_64BIT)
14094
    {
14095
      /* Compute offset from the end of the jmp to the target function.  */
14096
      rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14097
                               plus_constant (tramp, 10),
14098
                               NULL_RTX, 1, OPTAB_DIRECT);
14099
      emit_move_insn (gen_rtx_MEM (QImode, tramp),
14100
                      gen_int_mode (0xb9, QImode));
14101
      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14102
      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14103
                      gen_int_mode (0xe9, QImode));
14104
      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14105
    }
14106
  else
14107
    {
14108
      int offset = 0;
14109
      /* Try to load address using shorter movl instead of movabs.
14110
         We may want to support movq for kernel mode, but kernel does not use
14111
         trampolines at the moment.  */
14112
      if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14113
        {
14114
          fnaddr = copy_to_mode_reg (DImode, fnaddr);
14115
          emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14116
                          gen_int_mode (0xbb41, HImode));
14117
          emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14118
                          gen_lowpart (SImode, fnaddr));
14119
          offset += 6;
14120
        }
14121
      else
14122
        {
14123
          emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14124
                          gen_int_mode (0xbb49, HImode));
14125
          emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14126
                          fnaddr);
14127
          offset += 10;
14128
        }
14129
      /* Load static chain using movabs to r10.  */
14130
      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14131
                      gen_int_mode (0xba49, HImode));
14132
      emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14133
                      cxt);
14134
      offset += 10;
14135
      /* Jump to the r11 */
14136
      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14137
                      gen_int_mode (0xff49, HImode));
14138
      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14139
                      gen_int_mode (0xe3, QImode));
14140
      offset += 3;
14141
      gcc_assert (offset <= TRAMPOLINE_SIZE);
14142
    }
14143
 
14144
#ifdef ENABLE_EXECUTE_STACK
14145
  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14146
                     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14147
#endif
14148
}
14149
 
14150
/* Codes for all the SSE/MMX builtins.  */
14151
enum ix86_builtins
14152
{
14153
  IX86_BUILTIN_ADDPS,
14154
  IX86_BUILTIN_ADDSS,
14155
  IX86_BUILTIN_DIVPS,
14156
  IX86_BUILTIN_DIVSS,
14157
  IX86_BUILTIN_MULPS,
14158
  IX86_BUILTIN_MULSS,
14159
  IX86_BUILTIN_SUBPS,
14160
  IX86_BUILTIN_SUBSS,
14161
 
14162
  IX86_BUILTIN_CMPEQPS,
14163
  IX86_BUILTIN_CMPLTPS,
14164
  IX86_BUILTIN_CMPLEPS,
14165
  IX86_BUILTIN_CMPGTPS,
14166
  IX86_BUILTIN_CMPGEPS,
14167
  IX86_BUILTIN_CMPNEQPS,
14168
  IX86_BUILTIN_CMPNLTPS,
14169
  IX86_BUILTIN_CMPNLEPS,
14170
  IX86_BUILTIN_CMPNGTPS,
14171
  IX86_BUILTIN_CMPNGEPS,
14172
  IX86_BUILTIN_CMPORDPS,
14173
  IX86_BUILTIN_CMPUNORDPS,
14174
  IX86_BUILTIN_CMPEQSS,
14175
  IX86_BUILTIN_CMPLTSS,
14176
  IX86_BUILTIN_CMPLESS,
14177
  IX86_BUILTIN_CMPNEQSS,
14178
  IX86_BUILTIN_CMPNLTSS,
14179
  IX86_BUILTIN_CMPNLESS,
14180
  IX86_BUILTIN_CMPNGTSS,
14181
  IX86_BUILTIN_CMPNGESS,
14182
  IX86_BUILTIN_CMPORDSS,
14183
  IX86_BUILTIN_CMPUNORDSS,
14184
 
14185
  IX86_BUILTIN_COMIEQSS,
14186
  IX86_BUILTIN_COMILTSS,
14187
  IX86_BUILTIN_COMILESS,
14188
  IX86_BUILTIN_COMIGTSS,
14189
  IX86_BUILTIN_COMIGESS,
14190
  IX86_BUILTIN_COMINEQSS,
14191
  IX86_BUILTIN_UCOMIEQSS,
14192
  IX86_BUILTIN_UCOMILTSS,
14193
  IX86_BUILTIN_UCOMILESS,
14194
  IX86_BUILTIN_UCOMIGTSS,
14195
  IX86_BUILTIN_UCOMIGESS,
14196
  IX86_BUILTIN_UCOMINEQSS,
14197
 
14198
  IX86_BUILTIN_CVTPI2PS,
14199
  IX86_BUILTIN_CVTPS2PI,
14200
  IX86_BUILTIN_CVTSI2SS,
14201
  IX86_BUILTIN_CVTSI642SS,
14202
  IX86_BUILTIN_CVTSS2SI,
14203
  IX86_BUILTIN_CVTSS2SI64,
14204
  IX86_BUILTIN_CVTTPS2PI,
14205
  IX86_BUILTIN_CVTTSS2SI,
14206
  IX86_BUILTIN_CVTTSS2SI64,
14207
 
14208
  IX86_BUILTIN_MAXPS,
14209
  IX86_BUILTIN_MAXSS,
14210
  IX86_BUILTIN_MINPS,
14211
  IX86_BUILTIN_MINSS,
14212
 
14213
  IX86_BUILTIN_LOADUPS,
14214
  IX86_BUILTIN_STOREUPS,
14215
  IX86_BUILTIN_MOVSS,
14216
 
14217
  IX86_BUILTIN_MOVHLPS,
14218
  IX86_BUILTIN_MOVLHPS,
14219
  IX86_BUILTIN_LOADHPS,
14220
  IX86_BUILTIN_LOADLPS,
14221
  IX86_BUILTIN_STOREHPS,
14222
  IX86_BUILTIN_STORELPS,
14223
 
14224
  IX86_BUILTIN_MASKMOVQ,
14225
  IX86_BUILTIN_MOVMSKPS,
14226
  IX86_BUILTIN_PMOVMSKB,
14227
 
14228
  IX86_BUILTIN_MOVNTPS,
14229
  IX86_BUILTIN_MOVNTQ,
14230
 
14231
  IX86_BUILTIN_LOADDQU,
14232
  IX86_BUILTIN_STOREDQU,
14233
 
14234
  IX86_BUILTIN_PACKSSWB,
14235
  IX86_BUILTIN_PACKSSDW,
14236
  IX86_BUILTIN_PACKUSWB,
14237
 
14238
  IX86_BUILTIN_PADDB,
14239
  IX86_BUILTIN_PADDW,
14240
  IX86_BUILTIN_PADDD,
14241
  IX86_BUILTIN_PADDQ,
14242
  IX86_BUILTIN_PADDSB,
14243
  IX86_BUILTIN_PADDSW,
14244
  IX86_BUILTIN_PADDUSB,
14245
  IX86_BUILTIN_PADDUSW,
14246
  IX86_BUILTIN_PSUBB,
14247
  IX86_BUILTIN_PSUBW,
14248
  IX86_BUILTIN_PSUBD,
14249
  IX86_BUILTIN_PSUBQ,
14250
  IX86_BUILTIN_PSUBSB,
14251
  IX86_BUILTIN_PSUBSW,
14252
  IX86_BUILTIN_PSUBUSB,
14253
  IX86_BUILTIN_PSUBUSW,
14254
 
14255
  IX86_BUILTIN_PAND,
14256
  IX86_BUILTIN_PANDN,
14257
  IX86_BUILTIN_POR,
14258
  IX86_BUILTIN_PXOR,
14259
 
14260
  IX86_BUILTIN_PAVGB,
14261
  IX86_BUILTIN_PAVGW,
14262
 
14263
  IX86_BUILTIN_PCMPEQB,
14264
  IX86_BUILTIN_PCMPEQW,
14265
  IX86_BUILTIN_PCMPEQD,
14266
  IX86_BUILTIN_PCMPGTB,
14267
  IX86_BUILTIN_PCMPGTW,
14268
  IX86_BUILTIN_PCMPGTD,
14269
 
14270
  IX86_BUILTIN_PMADDWD,
14271
 
14272
  IX86_BUILTIN_PMAXSW,
14273
  IX86_BUILTIN_PMAXUB,
14274
  IX86_BUILTIN_PMINSW,
14275
  IX86_BUILTIN_PMINUB,
14276
 
14277
  IX86_BUILTIN_PMULHUW,
14278
  IX86_BUILTIN_PMULHW,
14279
  IX86_BUILTIN_PMULLW,
14280
 
14281
  IX86_BUILTIN_PSADBW,
14282
  IX86_BUILTIN_PSHUFW,
14283
 
14284
  IX86_BUILTIN_PSLLW,
14285
  IX86_BUILTIN_PSLLD,
14286
  IX86_BUILTIN_PSLLQ,
14287
  IX86_BUILTIN_PSRAW,
14288
  IX86_BUILTIN_PSRAD,
14289
  IX86_BUILTIN_PSRLW,
14290
  IX86_BUILTIN_PSRLD,
14291
  IX86_BUILTIN_PSRLQ,
14292
  IX86_BUILTIN_PSLLWI,
14293
  IX86_BUILTIN_PSLLDI,
14294
  IX86_BUILTIN_PSLLQI,
14295
  IX86_BUILTIN_PSRAWI,
14296
  IX86_BUILTIN_PSRADI,
14297
  IX86_BUILTIN_PSRLWI,
14298
  IX86_BUILTIN_PSRLDI,
14299
  IX86_BUILTIN_PSRLQI,
14300
 
14301
  IX86_BUILTIN_PUNPCKHBW,
14302
  IX86_BUILTIN_PUNPCKHWD,
14303
  IX86_BUILTIN_PUNPCKHDQ,
14304
  IX86_BUILTIN_PUNPCKLBW,
14305
  IX86_BUILTIN_PUNPCKLWD,
14306
  IX86_BUILTIN_PUNPCKLDQ,
14307
 
14308
  IX86_BUILTIN_SHUFPS,
14309
 
14310
  IX86_BUILTIN_RCPPS,
14311
  IX86_BUILTIN_RCPSS,
14312
  IX86_BUILTIN_RSQRTPS,
14313
  IX86_BUILTIN_RSQRTSS,
14314
  IX86_BUILTIN_SQRTPS,
14315
  IX86_BUILTIN_SQRTSS,
14316
 
14317
  IX86_BUILTIN_UNPCKHPS,
14318
  IX86_BUILTIN_UNPCKLPS,
14319
 
14320
  IX86_BUILTIN_ANDPS,
14321
  IX86_BUILTIN_ANDNPS,
14322
  IX86_BUILTIN_ORPS,
14323
  IX86_BUILTIN_XORPS,
14324
 
14325
  IX86_BUILTIN_EMMS,
14326
  IX86_BUILTIN_LDMXCSR,
14327
  IX86_BUILTIN_STMXCSR,
14328
  IX86_BUILTIN_SFENCE,
14329
 
14330
  /* 3DNow! Original */
14331
  IX86_BUILTIN_FEMMS,
14332
  IX86_BUILTIN_PAVGUSB,
14333
  IX86_BUILTIN_PF2ID,
14334
  IX86_BUILTIN_PFACC,
14335
  IX86_BUILTIN_PFADD,
14336
  IX86_BUILTIN_PFCMPEQ,
14337
  IX86_BUILTIN_PFCMPGE,
14338
  IX86_BUILTIN_PFCMPGT,
14339
  IX86_BUILTIN_PFMAX,
14340
  IX86_BUILTIN_PFMIN,
14341
  IX86_BUILTIN_PFMUL,
14342
  IX86_BUILTIN_PFRCP,
14343
  IX86_BUILTIN_PFRCPIT1,
14344
  IX86_BUILTIN_PFRCPIT2,
14345
  IX86_BUILTIN_PFRSQIT1,
14346
  IX86_BUILTIN_PFRSQRT,
14347
  IX86_BUILTIN_PFSUB,
14348
  IX86_BUILTIN_PFSUBR,
14349
  IX86_BUILTIN_PI2FD,
14350
  IX86_BUILTIN_PMULHRW,
14351
 
14352
  /* 3DNow! Athlon Extensions */
14353
  IX86_BUILTIN_PF2IW,
14354
  IX86_BUILTIN_PFNACC,
14355
  IX86_BUILTIN_PFPNACC,
14356
  IX86_BUILTIN_PI2FW,
14357
  IX86_BUILTIN_PSWAPDSI,
14358
  IX86_BUILTIN_PSWAPDSF,
14359
 
14360
  /* SSE2 */
14361
  IX86_BUILTIN_ADDPD,
14362
  IX86_BUILTIN_ADDSD,
14363
  IX86_BUILTIN_DIVPD,
14364
  IX86_BUILTIN_DIVSD,
14365
  IX86_BUILTIN_MULPD,
14366
  IX86_BUILTIN_MULSD,
14367
  IX86_BUILTIN_SUBPD,
14368
  IX86_BUILTIN_SUBSD,
14369
 
14370
  IX86_BUILTIN_CMPEQPD,
14371
  IX86_BUILTIN_CMPLTPD,
14372
  IX86_BUILTIN_CMPLEPD,
14373
  IX86_BUILTIN_CMPGTPD,
14374
  IX86_BUILTIN_CMPGEPD,
14375
  IX86_BUILTIN_CMPNEQPD,
14376
  IX86_BUILTIN_CMPNLTPD,
14377
  IX86_BUILTIN_CMPNLEPD,
14378
  IX86_BUILTIN_CMPNGTPD,
14379
  IX86_BUILTIN_CMPNGEPD,
14380
  IX86_BUILTIN_CMPORDPD,
14381
  IX86_BUILTIN_CMPUNORDPD,
14382
  IX86_BUILTIN_CMPNEPD,
14383
  IX86_BUILTIN_CMPEQSD,
14384
  IX86_BUILTIN_CMPLTSD,
14385
  IX86_BUILTIN_CMPLESD,
14386
  IX86_BUILTIN_CMPNEQSD,
14387
  IX86_BUILTIN_CMPNLTSD,
14388
  IX86_BUILTIN_CMPNLESD,
14389
  IX86_BUILTIN_CMPORDSD,
14390
  IX86_BUILTIN_CMPUNORDSD,
14391
  IX86_BUILTIN_CMPNESD,
14392
 
14393
  IX86_BUILTIN_COMIEQSD,
14394
  IX86_BUILTIN_COMILTSD,
14395
  IX86_BUILTIN_COMILESD,
14396
  IX86_BUILTIN_COMIGTSD,
14397
  IX86_BUILTIN_COMIGESD,
14398
  IX86_BUILTIN_COMINEQSD,
14399
  IX86_BUILTIN_UCOMIEQSD,
14400
  IX86_BUILTIN_UCOMILTSD,
14401
  IX86_BUILTIN_UCOMILESD,
14402
  IX86_BUILTIN_UCOMIGTSD,
14403
  IX86_BUILTIN_UCOMIGESD,
14404
  IX86_BUILTIN_UCOMINEQSD,
14405
 
14406
  IX86_BUILTIN_MAXPD,
14407
  IX86_BUILTIN_MAXSD,
14408
  IX86_BUILTIN_MINPD,
14409
  IX86_BUILTIN_MINSD,
14410
 
14411
  IX86_BUILTIN_ANDPD,
14412
  IX86_BUILTIN_ANDNPD,
14413
  IX86_BUILTIN_ORPD,
14414
  IX86_BUILTIN_XORPD,
14415
 
14416
  IX86_BUILTIN_SQRTPD,
14417
  IX86_BUILTIN_SQRTSD,
14418
 
14419
  IX86_BUILTIN_UNPCKHPD,
14420
  IX86_BUILTIN_UNPCKLPD,
14421
 
14422
  IX86_BUILTIN_SHUFPD,
14423
 
14424
  IX86_BUILTIN_LOADUPD,
14425
  IX86_BUILTIN_STOREUPD,
14426
  IX86_BUILTIN_MOVSD,
14427
 
14428
  IX86_BUILTIN_LOADHPD,
14429
  IX86_BUILTIN_LOADLPD,
14430
 
14431
  IX86_BUILTIN_CVTDQ2PD,
14432
  IX86_BUILTIN_CVTDQ2PS,
14433
 
14434
  IX86_BUILTIN_CVTPD2DQ,
14435
  IX86_BUILTIN_CVTPD2PI,
14436
  IX86_BUILTIN_CVTPD2PS,
14437
  IX86_BUILTIN_CVTTPD2DQ,
14438
  IX86_BUILTIN_CVTTPD2PI,
14439
 
14440
  IX86_BUILTIN_CVTPI2PD,
14441
  IX86_BUILTIN_CVTSI2SD,
14442
  IX86_BUILTIN_CVTSI642SD,
14443
 
14444
  IX86_BUILTIN_CVTSD2SI,
14445
  IX86_BUILTIN_CVTSD2SI64,
14446
  IX86_BUILTIN_CVTSD2SS,
14447
  IX86_BUILTIN_CVTSS2SD,
14448
  IX86_BUILTIN_CVTTSD2SI,
14449
  IX86_BUILTIN_CVTTSD2SI64,
14450
 
14451
  IX86_BUILTIN_CVTPS2DQ,
14452
  IX86_BUILTIN_CVTPS2PD,
14453
  IX86_BUILTIN_CVTTPS2DQ,
14454
 
14455
  IX86_BUILTIN_MOVNTI,
14456
  IX86_BUILTIN_MOVNTPD,
14457
  IX86_BUILTIN_MOVNTDQ,
14458
 
14459
  /* SSE2 MMX */
14460
  IX86_BUILTIN_MASKMOVDQU,
14461
  IX86_BUILTIN_MOVMSKPD,
14462
  IX86_BUILTIN_PMOVMSKB128,
14463
 
14464
  IX86_BUILTIN_PACKSSWB128,
14465
  IX86_BUILTIN_PACKSSDW128,
14466
  IX86_BUILTIN_PACKUSWB128,
14467
 
14468
  IX86_BUILTIN_PADDB128,
14469
  IX86_BUILTIN_PADDW128,
14470
  IX86_BUILTIN_PADDD128,
14471
  IX86_BUILTIN_PADDQ128,
14472
  IX86_BUILTIN_PADDSB128,
14473
  IX86_BUILTIN_PADDSW128,
14474
  IX86_BUILTIN_PADDUSB128,
14475
  IX86_BUILTIN_PADDUSW128,
14476
  IX86_BUILTIN_PSUBB128,
14477
  IX86_BUILTIN_PSUBW128,
14478
  IX86_BUILTIN_PSUBD128,
14479
  IX86_BUILTIN_PSUBQ128,
14480
  IX86_BUILTIN_PSUBSB128,
14481
  IX86_BUILTIN_PSUBSW128,
14482
  IX86_BUILTIN_PSUBUSB128,
14483
  IX86_BUILTIN_PSUBUSW128,
14484
 
14485
  IX86_BUILTIN_PAND128,
14486
  IX86_BUILTIN_PANDN128,
14487
  IX86_BUILTIN_POR128,
14488
  IX86_BUILTIN_PXOR128,
14489
 
14490
  IX86_BUILTIN_PAVGB128,
14491
  IX86_BUILTIN_PAVGW128,
14492
 
14493
  IX86_BUILTIN_PCMPEQB128,
14494
  IX86_BUILTIN_PCMPEQW128,
14495
  IX86_BUILTIN_PCMPEQD128,
14496
  IX86_BUILTIN_PCMPGTB128,
14497
  IX86_BUILTIN_PCMPGTW128,
14498
  IX86_BUILTIN_PCMPGTD128,
14499
 
14500
  IX86_BUILTIN_PMADDWD128,
14501
 
14502
  IX86_BUILTIN_PMAXSW128,
14503
  IX86_BUILTIN_PMAXUB128,
14504
  IX86_BUILTIN_PMINSW128,
14505
  IX86_BUILTIN_PMINUB128,
14506
 
14507
  IX86_BUILTIN_PMULUDQ,
14508
  IX86_BUILTIN_PMULUDQ128,
14509
  IX86_BUILTIN_PMULHUW128,
14510
  IX86_BUILTIN_PMULHW128,
14511
  IX86_BUILTIN_PMULLW128,
14512
 
14513
  IX86_BUILTIN_PSADBW128,
14514
  IX86_BUILTIN_PSHUFHW,
14515
  IX86_BUILTIN_PSHUFLW,
14516
  IX86_BUILTIN_PSHUFD,
14517
 
14518
  IX86_BUILTIN_PSLLW128,
14519
  IX86_BUILTIN_PSLLD128,
14520
  IX86_BUILTIN_PSLLQ128,
14521
  IX86_BUILTIN_PSRAW128,
14522
  IX86_BUILTIN_PSRAD128,
14523
  IX86_BUILTIN_PSRLW128,
14524
  IX86_BUILTIN_PSRLD128,
14525
  IX86_BUILTIN_PSRLQ128,
14526
  IX86_BUILTIN_PSLLDQI128,
14527
  IX86_BUILTIN_PSLLWI128,
14528
  IX86_BUILTIN_PSLLDI128,
14529
  IX86_BUILTIN_PSLLQI128,
14530
  IX86_BUILTIN_PSRAWI128,
14531
  IX86_BUILTIN_PSRADI128,
14532
  IX86_BUILTIN_PSRLDQI128,
14533
  IX86_BUILTIN_PSRLWI128,
14534
  IX86_BUILTIN_PSRLDI128,
14535
  IX86_BUILTIN_PSRLQI128,
14536
 
14537
  IX86_BUILTIN_PUNPCKHBW128,
14538
  IX86_BUILTIN_PUNPCKHWD128,
14539
  IX86_BUILTIN_PUNPCKHDQ128,
14540
  IX86_BUILTIN_PUNPCKHQDQ128,
14541
  IX86_BUILTIN_PUNPCKLBW128,
14542
  IX86_BUILTIN_PUNPCKLWD128,
14543
  IX86_BUILTIN_PUNPCKLDQ128,
14544
  IX86_BUILTIN_PUNPCKLQDQ128,
14545
 
14546
  IX86_BUILTIN_CLFLUSH,
14547
  IX86_BUILTIN_MFENCE,
14548
  IX86_BUILTIN_LFENCE,
14549
 
14550
  /* Prescott New Instructions.  */
14551
  IX86_BUILTIN_ADDSUBPS,
14552
  IX86_BUILTIN_HADDPS,
14553
  IX86_BUILTIN_HSUBPS,
14554
  IX86_BUILTIN_MOVSHDUP,
14555
  IX86_BUILTIN_MOVSLDUP,
14556
  IX86_BUILTIN_ADDSUBPD,
14557
  IX86_BUILTIN_HADDPD,
14558
  IX86_BUILTIN_HSUBPD,
14559
  IX86_BUILTIN_LDDQU,
14560
 
14561
  IX86_BUILTIN_MONITOR,
14562
  IX86_BUILTIN_MWAIT,
14563
 
14564
  IX86_BUILTIN_VEC_INIT_V2SI,
14565
  IX86_BUILTIN_VEC_INIT_V4HI,
14566
  IX86_BUILTIN_VEC_INIT_V8QI,
14567
  IX86_BUILTIN_VEC_EXT_V2DF,
14568
  IX86_BUILTIN_VEC_EXT_V2DI,
14569
  IX86_BUILTIN_VEC_EXT_V4SF,
14570
  IX86_BUILTIN_VEC_EXT_V4SI,
14571
  IX86_BUILTIN_VEC_EXT_V8HI,
14572
  IX86_BUILTIN_VEC_EXT_V16QI,
14573
  IX86_BUILTIN_VEC_EXT_V2SI,
14574
  IX86_BUILTIN_VEC_EXT_V4HI,
14575
  IX86_BUILTIN_VEC_SET_V8HI,
14576
  IX86_BUILTIN_VEC_SET_V4HI,
14577
 
14578
  IX86_BUILTIN_MAX
14579
};
14580
 
14581
#define def_builtin(MASK, NAME, TYPE, CODE)                             \
14582
do {                                                                    \
14583
  if ((MASK) & target_flags                                             \
14584
      && (!((MASK) & MASK_64BIT) || TARGET_64BIT))                      \
14585
    lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,   \
14586
                                 NULL, NULL_TREE);                      \
14587
} while (0)
14588
 
14589
/* Bits for builtin_description.flag.  */
14590
 
14591
/* Set when we don't support the comparison natively, and should
14592
   swap_comparison in order to support it.  */
14593
#define BUILTIN_DESC_SWAP_OPERANDS      1
14594
 
14595
struct builtin_description
14596
{
14597
  const unsigned int mask;
14598
  const enum insn_code icode;
14599
  const char *const name;
14600
  const enum ix86_builtins code;
14601
  const enum rtx_code comparison;
14602
  const unsigned int flag;
14603
};
14604
 
14605
static const struct builtin_description bdesc_comi[] =
14606
{
14607
  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14608
  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14609
  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14610
  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14611
  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14612
  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14613
  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14614
  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14615
  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14616
  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14617
  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14618
  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14619
  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14620
  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14621
  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14622
  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14623
  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14624
  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14625
  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14626
  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14627
  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14628
  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14629
  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14630
  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14631
};
14632
 
14633
static const struct builtin_description bdesc_2arg[] =
14634
{
14635
  /* SSE */
14636
  { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14637
  { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14638
  { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14639
  { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14640
  { MASK_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14641
  { MASK_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14642
  { MASK_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14643
  { MASK_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14644
 
14645
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14646
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14647
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14648
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14649
    BUILTIN_DESC_SWAP_OPERANDS },
14650
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14651
    BUILTIN_DESC_SWAP_OPERANDS },
14652
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14653
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14654
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14655
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14656
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14657
    BUILTIN_DESC_SWAP_OPERANDS },
14658
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14659
    BUILTIN_DESC_SWAP_OPERANDS },
14660
  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14661
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14662
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14663
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14664
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14665
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14666
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14667
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14668
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14669
    BUILTIN_DESC_SWAP_OPERANDS },
14670
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14671
    BUILTIN_DESC_SWAP_OPERANDS },
14672
  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
14673
 
14674
  { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14675
  { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14676
  { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14677
  { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14678
 
14679
  { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14680
  { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14681
  { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14682
  { MASK_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14683
 
14684
  { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14685
  { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14686
  { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14687
  { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14688
  { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14689
 
14690
  /* MMX */
14691
  { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14692
  { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14693
  { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14694
  { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14695
  { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14696
  { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14697
  { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14698
  { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14699
 
14700
  { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14701
  { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14702
  { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14703
  { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14704
  { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14705
  { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14706
  { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14707
  { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14708
 
14709
  { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14710
  { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14711
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14712
 
14713
  { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14714
  { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14715
  { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14716
  { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14717
 
14718
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14719
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14720
 
14721
  { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14722
  { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14723
  { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14724
  { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14725
  { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14726
  { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14727
 
14728
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14729
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14730
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14731
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14732
 
14733
  { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14734
  { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14735
  { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14736
  { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14737
  { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14738
  { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14739
 
14740
  /* Special.  */
14741
  { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14742
  { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14743
  { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14744
 
14745
  { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14746
  { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14747
  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14748
 
14749
  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14750
  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14751
  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14752
  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14753
  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14754
  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14755
 
14756
  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14757
  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14758
  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14759
  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14760
  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14761
  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14762
 
14763
  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14764
  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14765
  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14766
  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14767
 
14768
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14769
  { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14770
 
14771
  /* SSE2 */
14772
  { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14773
  { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14774
  { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14775
  { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14776
  { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14777
  { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14778
  { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14779
  { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14780
 
14781
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14782
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14783
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14784
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14785
    BUILTIN_DESC_SWAP_OPERANDS },
14786
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14787
    BUILTIN_DESC_SWAP_OPERANDS },
14788
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14789
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14790
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14791
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14792
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14793
    BUILTIN_DESC_SWAP_OPERANDS },
14794
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14795
    BUILTIN_DESC_SWAP_OPERANDS },
14796
  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14797
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14798
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14799
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14800
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14801
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14802
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14803
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14804
  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14805
 
14806
  { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14807
  { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14808
  { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14809
  { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14810
 
14811
  { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14812
  { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14813
  { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14814
  { MASK_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14815
 
14816
  { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14817
  { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14818
  { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14819
 
14820
  /* SSE2 MMX */
14821
  { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14822
  { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14823
  { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14824
  { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14825
  { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14826
  { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14827
  { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14828
  { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14829
 
14830
  { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14831
  { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14832
  { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14833
  { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14834
  { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14835
  { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14836
  { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14837
  { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14838
 
14839
  { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14840
  { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14841
 
14842
  { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14843
  { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14844
  { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14845
  { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14846
 
14847
  { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14848
  { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14849
 
14850
  { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14851
  { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14852
  { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14853
  { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14854
  { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14855
  { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14856
 
14857
  { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14858
  { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14859
  { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14860
  { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14861
 
14862
  { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14863
  { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14864
  { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14865
  { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14866
  { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14867
  { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14868
  { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14869
  { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14870
 
14871
  { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14872
  { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14873
  { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14874
 
14875
  { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14876
  { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14877
 
14878
  { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14879
  { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14880
 
14881
  { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14882
  { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14883
  { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14884
 
14885
  { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14886
  { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14887
  { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14888
 
14889
  { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14890
  { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14891
 
14892
  { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14893
 
14894
  { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14895
  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14896
  { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14897
  { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14898
 
14899
  /* SSE3 MMX */
14900
  { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14901
  { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14902
  { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14903
  { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14904
  { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14905
  { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14906
};
14907
 
14908
static const struct builtin_description bdesc_1arg[] =
14909
{
14910
  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14911
  { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14912
 
14913
  { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14914
  { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14915
  { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14916
 
14917
  { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14918
  { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14919
  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14920
  { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14921
  { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14922
  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14923
 
14924
  { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14925
  { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14926
 
14927
  { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14928
 
14929
  { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14930
  { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14931
 
14932
  { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14933
  { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14934
  { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14935
  { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14936
  { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14937
 
14938
  { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14939
 
14940
  { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14941
  { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14942
  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14943
  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14944
 
14945
  { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14946
  { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14947
  { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14948
 
14949
  /* SSE3 */
14950
  { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14951
  { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14952
};
14953
 
14954
static void
14955
ix86_init_builtins (void)
14956
{
14957
  if (TARGET_MMX)
14958
    ix86_init_mmx_sse_builtins ();
14959
}
14960
 
14961
/* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
14962
   is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
14963
   builtins.  */
14964
static void
14965
ix86_init_mmx_sse_builtins (void)
14966
{
14967
  const struct builtin_description * d;
14968
  size_t i;
14969
 
14970
  tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14971
  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14972
  tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14973
  tree V2DI_type_node
14974
    = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14975
  tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14976
  tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14977
  tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14978
  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14979
  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14980
  tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14981
 
14982
  tree pchar_type_node = build_pointer_type (char_type_node);
14983
  tree pcchar_type_node = build_pointer_type (
14984
                             build_type_variant (char_type_node, 1, 0));
14985
  tree pfloat_type_node = build_pointer_type (float_type_node);
14986
  tree pcfloat_type_node = build_pointer_type (
14987
                             build_type_variant (float_type_node, 1, 0));
14988
  tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14989
  tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14990
  tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14991
 
14992
  /* Comparisons.  */
14993
  tree int_ftype_v4sf_v4sf
14994
    = build_function_type_list (integer_type_node,
14995
                                V4SF_type_node, V4SF_type_node, NULL_TREE);
14996
  tree v4si_ftype_v4sf_v4sf
14997
    = build_function_type_list (V4SI_type_node,
14998
                                V4SF_type_node, V4SF_type_node, NULL_TREE);
14999
  /* MMX/SSE/integer conversions.  */
15000
  tree int_ftype_v4sf
15001
    = build_function_type_list (integer_type_node,
15002
                                V4SF_type_node, NULL_TREE);
15003
  tree int64_ftype_v4sf
15004
    = build_function_type_list (long_long_integer_type_node,
15005
                                V4SF_type_node, NULL_TREE);
15006
  tree int_ftype_v8qi
15007
    = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15008
  tree v4sf_ftype_v4sf_int
15009
    = build_function_type_list (V4SF_type_node,
15010
                                V4SF_type_node, integer_type_node, NULL_TREE);
15011
  tree v4sf_ftype_v4sf_int64
15012
    = build_function_type_list (V4SF_type_node,
15013
                                V4SF_type_node, long_long_integer_type_node,
15014
                                NULL_TREE);
15015
  tree v4sf_ftype_v4sf_v2si
15016
    = build_function_type_list (V4SF_type_node,
15017
                                V4SF_type_node, V2SI_type_node, NULL_TREE);
15018
 
15019
  /* Miscellaneous.  */
15020
  tree v8qi_ftype_v4hi_v4hi
15021
    = build_function_type_list (V8QI_type_node,
15022
                                V4HI_type_node, V4HI_type_node, NULL_TREE);
15023
  tree v4hi_ftype_v2si_v2si
15024
    = build_function_type_list (V4HI_type_node,
15025
                                V2SI_type_node, V2SI_type_node, NULL_TREE);
15026
  tree v4sf_ftype_v4sf_v4sf_int
15027
    = build_function_type_list (V4SF_type_node,
15028
                                V4SF_type_node, V4SF_type_node,
15029
                                integer_type_node, NULL_TREE);
15030
  tree v2si_ftype_v4hi_v4hi
15031
    = build_function_type_list (V2SI_type_node,
15032
                                V4HI_type_node, V4HI_type_node, NULL_TREE);
15033
  tree v4hi_ftype_v4hi_int
15034
    = build_function_type_list (V4HI_type_node,
15035
                                V4HI_type_node, integer_type_node, NULL_TREE);
15036
  tree v4hi_ftype_v4hi_di
15037
    = build_function_type_list (V4HI_type_node,
15038
                                V4HI_type_node, long_long_unsigned_type_node,
15039
                                NULL_TREE);
15040
  tree v2si_ftype_v2si_di
15041
    = build_function_type_list (V2SI_type_node,
15042
                                V2SI_type_node, long_long_unsigned_type_node,
15043
                                NULL_TREE);
15044
  tree void_ftype_void
15045
    = build_function_type (void_type_node, void_list_node);
15046
  tree void_ftype_unsigned
15047
    = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15048
  tree void_ftype_unsigned_unsigned
15049
    = build_function_type_list (void_type_node, unsigned_type_node,
15050
                                unsigned_type_node, NULL_TREE);
15051
  tree void_ftype_pcvoid_unsigned_unsigned
15052
    = build_function_type_list (void_type_node, const_ptr_type_node,
15053
                                unsigned_type_node, unsigned_type_node,
15054
                                NULL_TREE);
15055
  tree unsigned_ftype_void
15056
    = build_function_type (unsigned_type_node, void_list_node);
15057
  tree v2si_ftype_v4sf
15058
    = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15059
  /* Loads/stores.  */
15060
  tree void_ftype_v8qi_v8qi_pchar
15061
    = build_function_type_list (void_type_node,
15062
                                V8QI_type_node, V8QI_type_node,
15063
                                pchar_type_node, NULL_TREE);
15064
  tree v4sf_ftype_pcfloat
15065
    = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15066
  /* @@@ the type is bogus */
15067
  tree v4sf_ftype_v4sf_pv2si
15068
    = build_function_type_list (V4SF_type_node,
15069
                                V4SF_type_node, pv2si_type_node, NULL_TREE);
15070
  tree void_ftype_pv2si_v4sf
15071
    = build_function_type_list (void_type_node,
15072
                                pv2si_type_node, V4SF_type_node, NULL_TREE);
15073
  tree void_ftype_pfloat_v4sf
15074
    = build_function_type_list (void_type_node,
15075
                                pfloat_type_node, V4SF_type_node, NULL_TREE);
15076
  tree void_ftype_pdi_di
15077
    = build_function_type_list (void_type_node,
15078
                                pdi_type_node, long_long_unsigned_type_node,
15079
                                NULL_TREE);
15080
  tree void_ftype_pv2di_v2di
15081
    = build_function_type_list (void_type_node,
15082
                                pv2di_type_node, V2DI_type_node, NULL_TREE);
15083
  /* Normal vector unops.  */
15084
  tree v4sf_ftype_v4sf
15085
    = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15086
 
15087
  /* Normal vector binops.  */
15088
  tree v4sf_ftype_v4sf_v4sf
15089
    = build_function_type_list (V4SF_type_node,
15090
                                V4SF_type_node, V4SF_type_node, NULL_TREE);
15091
  tree v8qi_ftype_v8qi_v8qi
15092
    = build_function_type_list (V8QI_type_node,
15093
                                V8QI_type_node, V8QI_type_node, NULL_TREE);
15094
  tree v4hi_ftype_v4hi_v4hi
15095
    = build_function_type_list (V4HI_type_node,
15096
                                V4HI_type_node, V4HI_type_node, NULL_TREE);
15097
  tree v2si_ftype_v2si_v2si
15098
    = build_function_type_list (V2SI_type_node,
15099
                                V2SI_type_node, V2SI_type_node, NULL_TREE);
15100
  tree di_ftype_di_di
15101
    = build_function_type_list (long_long_unsigned_type_node,
15102
                                long_long_unsigned_type_node,
15103
                                long_long_unsigned_type_node, NULL_TREE);
15104
 
15105
  tree v2si_ftype_v2sf
15106
    = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15107
  tree v2sf_ftype_v2si
15108
    = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15109
  tree v2si_ftype_v2si
15110
    = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15111
  tree v2sf_ftype_v2sf
15112
    = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15113
  tree v2sf_ftype_v2sf_v2sf
15114
    = build_function_type_list (V2SF_type_node,
15115
                                V2SF_type_node, V2SF_type_node, NULL_TREE);
15116
  tree v2si_ftype_v2sf_v2sf
15117
    = build_function_type_list (V2SI_type_node,
15118
                                V2SF_type_node, V2SF_type_node, NULL_TREE);
15119
  tree pint_type_node    = build_pointer_type (integer_type_node);
15120
  tree pdouble_type_node = build_pointer_type (double_type_node);
15121
  tree pcdouble_type_node = build_pointer_type (
15122
                                build_type_variant (double_type_node, 1, 0));
15123
  tree int_ftype_v2df_v2df
15124
    = build_function_type_list (integer_type_node,
15125
                                V2DF_type_node, V2DF_type_node, NULL_TREE);
15126
 
15127
  tree void_ftype_pcvoid
15128
    = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15129
  tree v4sf_ftype_v4si
15130
    = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15131
  tree v4si_ftype_v4sf
15132
    = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15133
  tree v2df_ftype_v4si
15134
    = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15135
  tree v4si_ftype_v2df
15136
    = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15137
  tree v2si_ftype_v2df
15138
    = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15139
  tree v4sf_ftype_v2df
15140
    = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15141
  tree v2df_ftype_v2si
15142
    = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15143
  tree v2df_ftype_v4sf
15144
    = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15145
  tree int_ftype_v2df
15146
    = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15147
  tree int64_ftype_v2df
15148
    = build_function_type_list (long_long_integer_type_node,
15149
                                V2DF_type_node, NULL_TREE);
15150
  tree v2df_ftype_v2df_int
15151
    = build_function_type_list (V2DF_type_node,
15152
                                V2DF_type_node, integer_type_node, NULL_TREE);
15153
  tree v2df_ftype_v2df_int64
15154
    = build_function_type_list (V2DF_type_node,
15155
                                V2DF_type_node, long_long_integer_type_node,
15156
                                NULL_TREE);
15157
  tree v4sf_ftype_v4sf_v2df
15158
    = build_function_type_list (V4SF_type_node,
15159
                                V4SF_type_node, V2DF_type_node, NULL_TREE);
15160
  tree v2df_ftype_v2df_v4sf
15161
    = build_function_type_list (V2DF_type_node,
15162
                                V2DF_type_node, V4SF_type_node, NULL_TREE);
15163
  tree v2df_ftype_v2df_v2df_int
15164
    = build_function_type_list (V2DF_type_node,
15165
                                V2DF_type_node, V2DF_type_node,
15166
                                integer_type_node,
15167
                                NULL_TREE);
15168
  tree v2df_ftype_v2df_pcdouble
15169
    = build_function_type_list (V2DF_type_node,
15170
                                V2DF_type_node, pcdouble_type_node, NULL_TREE);
15171
  tree void_ftype_pdouble_v2df
15172
    = build_function_type_list (void_type_node,
15173
                                pdouble_type_node, V2DF_type_node, NULL_TREE);
15174
  tree void_ftype_pint_int
15175
    = build_function_type_list (void_type_node,
15176
                                pint_type_node, integer_type_node, NULL_TREE);
15177
  tree void_ftype_v16qi_v16qi_pchar
15178
    = build_function_type_list (void_type_node,
15179
                                V16QI_type_node, V16QI_type_node,
15180
                                pchar_type_node, NULL_TREE);
15181
  tree v2df_ftype_pcdouble
15182
    = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15183
  tree v2df_ftype_v2df_v2df
15184
    = build_function_type_list (V2DF_type_node,
15185
                                V2DF_type_node, V2DF_type_node, NULL_TREE);
15186
  tree v16qi_ftype_v16qi_v16qi
15187
    = build_function_type_list (V16QI_type_node,
15188
                                V16QI_type_node, V16QI_type_node, NULL_TREE);
15189
  tree v8hi_ftype_v8hi_v8hi
15190
    = build_function_type_list (V8HI_type_node,
15191
                                V8HI_type_node, V8HI_type_node, NULL_TREE);
15192
  tree v4si_ftype_v4si_v4si
15193
    = build_function_type_list (V4SI_type_node,
15194
                                V4SI_type_node, V4SI_type_node, NULL_TREE);
15195
  tree v2di_ftype_v2di_v2di
15196
    = build_function_type_list (V2DI_type_node,
15197
                                V2DI_type_node, V2DI_type_node, NULL_TREE);
15198
  tree v2di_ftype_v2df_v2df
15199
    = build_function_type_list (V2DI_type_node,
15200
                                V2DF_type_node, V2DF_type_node, NULL_TREE);
15201
  tree v2df_ftype_v2df
15202
    = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15203
  tree v2di_ftype_v2di_int
15204
    = build_function_type_list (V2DI_type_node,
15205
                                V2DI_type_node, integer_type_node, NULL_TREE);
15206
  tree v4si_ftype_v4si_int
15207
    = build_function_type_list (V4SI_type_node,
15208
                                V4SI_type_node, integer_type_node, NULL_TREE);
15209
  tree v8hi_ftype_v8hi_int
15210
    = build_function_type_list (V8HI_type_node,
15211
                                V8HI_type_node, integer_type_node, NULL_TREE);
15212
  tree v4si_ftype_v8hi_v8hi
15213
    = build_function_type_list (V4SI_type_node,
15214
                                V8HI_type_node, V8HI_type_node, NULL_TREE);
15215
  tree di_ftype_v8qi_v8qi
15216
    = build_function_type_list (long_long_unsigned_type_node,
15217
                                V8QI_type_node, V8QI_type_node, NULL_TREE);
15218
  tree di_ftype_v2si_v2si
15219
    = build_function_type_list (long_long_unsigned_type_node,
15220
                                V2SI_type_node, V2SI_type_node, NULL_TREE);
15221
  tree v2di_ftype_v16qi_v16qi
15222
    = build_function_type_list (V2DI_type_node,
15223
                                V16QI_type_node, V16QI_type_node, NULL_TREE);
15224
  tree v2di_ftype_v4si_v4si
15225
    = build_function_type_list (V2DI_type_node,
15226
                                V4SI_type_node, V4SI_type_node, NULL_TREE);
15227
  tree int_ftype_v16qi
15228
    = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15229
  tree v16qi_ftype_pcchar
15230
    = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15231
  tree void_ftype_pchar_v16qi
15232
    = build_function_type_list (void_type_node,
15233
                                pchar_type_node, V16QI_type_node, NULL_TREE);
15234
 
15235
  tree float80_type;
15236
  tree float128_type;
15237
  tree ftype;
15238
 
15239
  /* The __float80 type.  */
15240
  if (TYPE_MODE (long_double_type_node) == XFmode)
15241
    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15242
                                               "__float80");
15243
  else
15244
    {
15245
      /* The __float80 type.  */
15246
      float80_type = make_node (REAL_TYPE);
15247
      TYPE_PRECISION (float80_type) = 80;
15248
      layout_type (float80_type);
15249
      (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15250
    }
15251
 
15252
  if (TARGET_64BIT)
15253
    {
15254
      float128_type = make_node (REAL_TYPE);
15255
      TYPE_PRECISION (float128_type) = 128;
15256
      layout_type (float128_type);
15257
      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15258
    }
15259
 
15260
  /* Add all builtins that are more or less simple operations on two
15261
     operands.  */
15262
  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15263
    {
15264
      /* Use one of the operands; the target can have a different mode for
15265
         mask-generating compares.  */
15266
      enum machine_mode mode;
15267
      tree type;
15268
 
15269
      if (d->name == 0)
15270
        continue;
15271
      mode = insn_data[d->icode].operand[1].mode;
15272
 
15273
      switch (mode)
15274
        {
15275
        case V16QImode:
15276
          type = v16qi_ftype_v16qi_v16qi;
15277
          break;
15278
        case V8HImode:
15279
          type = v8hi_ftype_v8hi_v8hi;
15280
          break;
15281
        case V4SImode:
15282
          type = v4si_ftype_v4si_v4si;
15283
          break;
15284
        case V2DImode:
15285
          type = v2di_ftype_v2di_v2di;
15286
          break;
15287
        case V2DFmode:
15288
          type = v2df_ftype_v2df_v2df;
15289
          break;
15290
        case V4SFmode:
15291
          type = v4sf_ftype_v4sf_v4sf;
15292
          break;
15293
        case V8QImode:
15294
          type = v8qi_ftype_v8qi_v8qi;
15295
          break;
15296
        case V4HImode:
15297
          type = v4hi_ftype_v4hi_v4hi;
15298
          break;
15299
        case V2SImode:
15300
          type = v2si_ftype_v2si_v2si;
15301
          break;
15302
        case DImode:
15303
          type = di_ftype_di_di;
15304
          break;
15305
 
15306
        default:
15307
          gcc_unreachable ();
15308
        }
15309
 
15310
      /* Override for comparisons.  */
15311
      if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15312
          || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15313
        type = v4si_ftype_v4sf_v4sf;
15314
 
15315
      if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15316
          || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15317
        type = v2di_ftype_v2df_v2df;
15318
 
15319
      def_builtin (d->mask, d->name, type, d->code);
15320
    }
15321
 
15322
  /* Add the remaining MMX insns with somewhat more complicated types.  */
15323
  def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15324
  def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15325
  def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15326
  def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15327
 
15328
  def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15329
  def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15330
  def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15331
 
15332
  def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15333
  def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15334
 
15335
  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15336
  def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15337
 
15338
  /* comi/ucomi insns.  */
15339
  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15340
    if (d->mask == MASK_SSE2)
15341
      def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15342
    else
15343
      def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15344
 
15345
  def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15346
  def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15347
  def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15348
 
15349
  def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15350
  def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15351
  def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15352
  def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15353
  def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15354
  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15355
  def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15356
  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15357
  def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15358
  def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15359
  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15360
 
15361
  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15362
 
15363
  def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15364
  def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15365
 
15366
  def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15367
  def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15368
  def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15369
  def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15370
 
15371
  def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15372
  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15373
  def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15374
  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15375
 
15376
  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15377
 
15378
  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15379
 
15380
  def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15381
  def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15382
  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15383
  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15384
  def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15385
  def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15386
 
15387
  def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15388
 
15389
  /* Original 3DNow!  */
15390
  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15391
  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15392
  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15393
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15394
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15395
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15396
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15397
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15398
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15399
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15400
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15401
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15402
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15403
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15404
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15405
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15406
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15407
  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15408
  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15409
  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15410
 
15411
  /* 3DNow! extension as used in the Athlon CPU.  */
15412
  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15413
  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15414
  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15415
  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15416
  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15417
  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15418
 
15419
  /* SSE2 */
15420
  def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15421
 
15422
  def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15423
  def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15424
 
15425
  def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15426
  def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15427
 
15428
  def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15429
  def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15430
  def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15431
  def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15432
  def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15433
 
15434
  def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15435
  def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15436
  def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15437
  def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15438
 
15439
  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15440
  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15441
 
15442
  def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15443
 
15444
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15445
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15446
 
15447
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15448
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15449
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15450
  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15451
  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15452
 
15453
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15454
 
15455
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15456
  def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15457
  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15458
  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15459
 
15460
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15461
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15462
  def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15463
 
15464
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15465
  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15466
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15467
  def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15468
 
15469
  def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15470
  def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15471
  def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15472
 
15473
  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15474
  def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15475
 
15476
  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15477
  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15478
 
15479
  def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15480
  def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15481
  def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15482
 
15483
  def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
15484
  def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
15485
  def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15486
 
15487
  def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
15488
  def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
15489
 
15490
  def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15491
  def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15492
  def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15493
  def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15494
 
15495
  def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15496
  def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15497
  def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15498
  def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15499
 
15500
  def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15501
  def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15502
 
15503
  def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15504
 
15505
  /* Prescott New Instructions.  */
15506
  def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15507
               void_ftype_pcvoid_unsigned_unsigned,
15508
               IX86_BUILTIN_MONITOR);
15509
  def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15510
               void_ftype_unsigned_unsigned,
15511
               IX86_BUILTIN_MWAIT);
15512
  def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15513
               v4sf_ftype_v4sf,
15514
               IX86_BUILTIN_MOVSHDUP);
15515
  def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15516
               v4sf_ftype_v4sf,
15517
               IX86_BUILTIN_MOVSLDUP);
15518
  def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15519
               v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15520
 
15521
  /* Access to the vec_init patterns.  */
15522
  ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15523
                                    integer_type_node, NULL_TREE);
15524
  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15525
               ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15526
 
15527
  ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15528
                                    short_integer_type_node,
15529
                                    short_integer_type_node,
15530
                                    short_integer_type_node, NULL_TREE);
15531
  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15532
               ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15533
 
15534
  ftype = build_function_type_list (V8QI_type_node, char_type_node,
15535
                                    char_type_node, char_type_node,
15536
                                    char_type_node, char_type_node,
15537
                                    char_type_node, char_type_node,
15538
                                    char_type_node, NULL_TREE);
15539
  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15540
               ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15541
 
15542
  /* Access to the vec_extract patterns.  */
15543
  ftype = build_function_type_list (double_type_node, V2DF_type_node,
15544
                                    integer_type_node, NULL_TREE);
15545
  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
15546
               ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15547
 
15548
  ftype = build_function_type_list (long_long_integer_type_node,
15549
                                    V2DI_type_node, integer_type_node,
15550
                                    NULL_TREE);
15551
  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
15552
               ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15553
 
15554
  ftype = build_function_type_list (float_type_node, V4SF_type_node,
15555
                                    integer_type_node, NULL_TREE);
15556
  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15557
               ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15558
 
15559
  ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15560
                                    integer_type_node, NULL_TREE);
15561
  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
15562
               ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15563
 
15564
  ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15565
                                    integer_type_node, NULL_TREE);
15566
  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
15567
               ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15568
 
15569
  ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15570
                                    integer_type_node, NULL_TREE);
15571
  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15572
               ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15573
 
15574
  ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15575
                                    integer_type_node, NULL_TREE);
15576
  def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15577
               ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15578
 
15579
  ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15580
                                    integer_type_node, NULL_TREE);
15581
  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
15582
 
15583
  /* Access to the vec_set patterns.  */
15584
  ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15585
                                    intHI_type_node,
15586
                                    integer_type_node, NULL_TREE);
15587
  def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
15588
               ftype, IX86_BUILTIN_VEC_SET_V8HI);
15589
 
15590
  ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15591
                                    intHI_type_node,
15592
                                    integer_type_node, NULL_TREE);
15593
  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15594
               ftype, IX86_BUILTIN_VEC_SET_V4HI);
15595
}
15596
 
15597
/* Errors in the source file can cause expand_expr to return const0_rtx
15598
   where we expect a vector.  To avoid crashing, use one of the vector
15599
   clear instructions.  */
15600
static rtx
15601
safe_vector_operand (rtx x, enum machine_mode mode)
15602
{
15603
  if (x == const0_rtx)
15604
    x = CONST0_RTX (mode);
15605
  return x;
15606
}
15607
 
15608
/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
15609
 
15610
static rtx
15611
ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15612
{
15613
  rtx pat, xops[3];
15614
  tree arg0 = TREE_VALUE (arglist);
15615
  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15616
  rtx op0 = expand_normal (arg0);
15617
  rtx op1 = expand_normal (arg1);
15618
  enum machine_mode tmode = insn_data[icode].operand[0].mode;
15619
  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15620
  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15621
 
15622
  if (VECTOR_MODE_P (mode0))
15623
    op0 = safe_vector_operand (op0, mode0);
15624
  if (VECTOR_MODE_P (mode1))
15625
    op1 = safe_vector_operand (op1, mode1);
15626
 
15627
  if (optimize || !target
15628
      || GET_MODE (target) != tmode
15629
      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15630
    target = gen_reg_rtx (tmode);
15631
 
15632
  if (GET_MODE (op1) == SImode && mode1 == TImode)
15633
    {
15634
      rtx x = gen_reg_rtx (V4SImode);
15635
      emit_insn (gen_sse2_loadd (x, op1));
15636
      op1 = gen_lowpart (TImode, x);
15637
    }
15638
 
15639
  /* The insn must want input operands in the same modes as the
15640
     result.  */
15641
  gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15642
              && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15643
 
15644
  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15645
    op0 = copy_to_mode_reg (mode0, op0);
15646
  if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15647
    op1 = copy_to_mode_reg (mode1, op1);
15648
 
15649
  /* ??? Using ix86_fixup_binary_operands is problematic when
15650
     we've got mismatched modes.  Fake it.  */
15651
 
15652
  xops[0] = target;
15653
  xops[1] = op0;
15654
  xops[2] = op1;
15655
 
15656
  if (tmode == mode0 && tmode == mode1)
15657
    {
15658
      target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15659
      op0 = xops[1];
15660
      op1 = xops[2];
15661
    }
15662
  else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15663
    {
15664
      op0 = force_reg (mode0, op0);
15665
      op1 = force_reg (mode1, op1);
15666
      target = gen_reg_rtx (tmode);
15667
    }
15668
 
15669
  pat = GEN_FCN (icode) (target, op0, op1);
15670
  if (! pat)
15671
    return 0;
15672
  emit_insn (pat);
15673
  return target;
15674
}
15675
 
15676
/* Subroutine of ix86_expand_builtin to take care of stores.  */
15677
 
15678
static rtx
15679
ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15680
{
15681
  rtx pat;
15682
  tree arg0 = TREE_VALUE (arglist);
15683
  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15684
  rtx op0 = expand_normal (arg0);
15685
  rtx op1 = expand_normal (arg1);
15686
  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15687
  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15688
 
15689
  if (VECTOR_MODE_P (mode1))
15690
    op1 = safe_vector_operand (op1, mode1);
15691
 
15692
  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15693
  op1 = copy_to_mode_reg (mode1, op1);
15694
 
15695
  pat = GEN_FCN (icode) (op0, op1);
15696
  if (pat)
15697
    emit_insn (pat);
15698
  return 0;
15699
}
15700
 
15701
/* Subroutine of ix86_expand_builtin to take care of unop insns.  */
15702
 
15703
static rtx
15704
ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15705
                          rtx target, int do_load)
15706
{
15707
  rtx pat;
15708
  tree arg0 = TREE_VALUE (arglist);
15709
  rtx op0 = expand_normal (arg0);
15710
  enum machine_mode tmode = insn_data[icode].operand[0].mode;
15711
  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15712
 
15713
  if (optimize || !target
15714
      || GET_MODE (target) != tmode
15715
      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15716
    target = gen_reg_rtx (tmode);
15717
  if (do_load)
15718
    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15719
  else
15720
    {
15721
      if (VECTOR_MODE_P (mode0))
15722
        op0 = safe_vector_operand (op0, mode0);
15723
 
15724
      if ((optimize && !register_operand (op0, mode0))
15725
          || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15726
        op0 = copy_to_mode_reg (mode0, op0);
15727
    }
15728
 
15729
  pat = GEN_FCN (icode) (target, op0);
15730
  if (! pat)
15731
    return 0;
15732
  emit_insn (pat);
15733
  return target;
15734
}
15735
 
15736
/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15737
   sqrtss, rsqrtss, rcpss.  */
15738
 
15739
static rtx
15740
ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15741
{
15742
  rtx pat;
15743
  tree arg0 = TREE_VALUE (arglist);
15744
  rtx op1, op0 = expand_normal (arg0);
15745
  enum machine_mode tmode = insn_data[icode].operand[0].mode;
15746
  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15747
 
15748
  if (optimize || !target
15749
      || GET_MODE (target) != tmode
15750
      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15751
    target = gen_reg_rtx (tmode);
15752
 
15753
  if (VECTOR_MODE_P (mode0))
15754
    op0 = safe_vector_operand (op0, mode0);
15755
 
15756
  if ((optimize && !register_operand (op0, mode0))
15757
      || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15758
    op0 = copy_to_mode_reg (mode0, op0);
15759
 
15760
  op1 = op0;
15761
  if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15762
    op1 = copy_to_mode_reg (mode0, op1);
15763
 
15764
  pat = GEN_FCN (icode) (target, op0, op1);
15765
  if (! pat)
15766
    return 0;
15767
  emit_insn (pat);
15768
  return target;
15769
}
15770
 
15771
/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
15772
 
15773
static rtx
15774
ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15775
                         rtx target)
15776
{
15777
  rtx pat;
15778
  tree arg0 = TREE_VALUE (arglist);
15779
  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15780
  rtx op0 = expand_normal (arg0);
15781
  rtx op1 = expand_normal (arg1);
15782
  rtx op2;
15783
  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15784
  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15785
  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15786
  enum rtx_code comparison = d->comparison;
15787
 
15788
  if (VECTOR_MODE_P (mode0))
15789
    op0 = safe_vector_operand (op0, mode0);
15790
  if (VECTOR_MODE_P (mode1))
15791
    op1 = safe_vector_operand (op1, mode1);
15792
 
15793
  /* Swap operands if we have a comparison that isn't available in
15794
     hardware.  */
15795
  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15796
    {
15797
      rtx tmp = gen_reg_rtx (mode1);
15798
      emit_move_insn (tmp, op1);
15799
      op1 = op0;
15800
      op0 = tmp;
15801
    }
15802
 
15803
  if (optimize || !target
15804
      || GET_MODE (target) != tmode
15805
      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15806
    target = gen_reg_rtx (tmode);
15807
 
15808
  if ((optimize && !register_operand (op0, mode0))
15809
      || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15810
    op0 = copy_to_mode_reg (mode0, op0);
15811
  if ((optimize && !register_operand (op1, mode1))
15812
      || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15813
    op1 = copy_to_mode_reg (mode1, op1);
15814
 
15815
  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15816
  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15817
  if (! pat)
15818
    return 0;
15819
  emit_insn (pat);
15820
  return target;
15821
}
15822
 
15823
/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
15824
 
15825
static rtx
15826
ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15827
                      rtx target)
15828
{
15829
  rtx pat;
15830
  tree arg0 = TREE_VALUE (arglist);
15831
  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15832
  rtx op0 = expand_normal (arg0);
15833
  rtx op1 = expand_normal (arg1);
15834
  rtx op2;
15835
  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15836
  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15837
  enum rtx_code comparison = d->comparison;
15838
 
15839
  if (VECTOR_MODE_P (mode0))
15840
    op0 = safe_vector_operand (op0, mode0);
15841
  if (VECTOR_MODE_P (mode1))
15842
    op1 = safe_vector_operand (op1, mode1);
15843
 
15844
  /* Swap operands if we have a comparison that isn't available in
15845
     hardware.  */
15846
  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15847
    {
15848
      rtx tmp = op1;
15849
      op1 = op0;
15850
      op0 = tmp;
15851
    }
15852
 
15853
  target = gen_reg_rtx (SImode);
15854
  emit_move_insn (target, const0_rtx);
15855
  target = gen_rtx_SUBREG (QImode, target, 0);
15856
 
15857
  if ((optimize && !register_operand (op0, mode0))
15858
      || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15859
    op0 = copy_to_mode_reg (mode0, op0);
15860
  if ((optimize && !register_operand (op1, mode1))
15861
      || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15862
    op1 = copy_to_mode_reg (mode1, op1);
15863
 
15864
  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15865
  pat = GEN_FCN (d->icode) (op0, op1);
15866
  if (! pat)
15867
    return 0;
15868
  emit_insn (pat);
15869
  emit_insn (gen_rtx_SET (VOIDmode,
15870
                          gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15871
                          gen_rtx_fmt_ee (comparison, QImode,
15872
                                          SET_DEST (pat),
15873
                                          const0_rtx)));
15874
 
15875
  return SUBREG_REG (target);
15876
}
15877
 
15878
/* Return the integer constant in ARG.  Constrain it to be in the range
15879
   of the subparts of VEC_TYPE; issue an error if not.  */
15880
 
15881
static int
15882
get_element_number (tree vec_type, tree arg)
15883
{
15884
  unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15885
 
15886
  if (!host_integerp (arg, 1)
15887
      || (elt = tree_low_cst (arg, 1), elt > max))
15888
    {
15889
      error ("selector must be an integer constant in the range 0..%wi", max);
15890
      return 0;
15891
    }
15892
 
15893
  return elt;
15894
}
15895
 
15896
/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
15897
   ix86_expand_vector_init.  We DO have language-level syntax for this, in
15898
   the form of  (type){ init-list }.  Except that since we can't place emms
15899
   instructions from inside the compiler, we can't allow the use of MMX
15900
   registers unless the user explicitly asks for it.  So we do *not* define
15901
   vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
15902
   we have builtins invoked by mmintrin.h that gives us license to emit
15903
   these sorts of instructions.  */
15904
 
15905
static rtx
15906
ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15907
{
15908
  enum machine_mode tmode = TYPE_MODE (type);
15909
  enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15910
  int i, n_elt = GET_MODE_NUNITS (tmode);
15911
  rtvec v = rtvec_alloc (n_elt);
15912
 
15913
  gcc_assert (VECTOR_MODE_P (tmode));
15914
 
15915
  for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15916
    {
15917
      rtx x = expand_normal (TREE_VALUE (arglist));
15918
      RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15919
    }
15920
 
15921
  gcc_assert (arglist == NULL);
15922
 
15923
  if (!target || !register_operand (target, tmode))
15924
    target = gen_reg_rtx (tmode);
15925
 
15926
  ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15927
  return target;
15928
}
15929
 
15930
/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
15931
   ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
15932
   had a language-level syntax for referencing vector elements.  */
15933
 
15934
static rtx
15935
ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15936
{
15937
  enum machine_mode tmode, mode0;
15938
  tree arg0, arg1;
15939
  int elt;
15940
  rtx op0;
15941
 
15942
  arg0 = TREE_VALUE (arglist);
15943
  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15944
 
15945
  op0 = expand_normal (arg0);
15946
  elt = get_element_number (TREE_TYPE (arg0), arg1);
15947
 
15948
  tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15949
  mode0 = TYPE_MODE (TREE_TYPE (arg0));
15950
  gcc_assert (VECTOR_MODE_P (mode0));
15951
 
15952
  op0 = force_reg (mode0, op0);
15953
 
15954
  if (optimize || !target || !register_operand (target, tmode))
15955
    target = gen_reg_rtx (tmode);
15956
 
15957
  ix86_expand_vector_extract (true, target, op0, elt);
15958
 
15959
  return target;
15960
}
15961
 
15962
/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
15963
   ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
15964
   a language-level syntax for referencing vector elements.  */
15965
 
15966
static rtx
15967
ix86_expand_vec_set_builtin (tree arglist)
15968
{
15969
  enum machine_mode tmode, mode1;
15970
  tree arg0, arg1, arg2;
15971
  int elt;
15972
  rtx op0, op1, target;
15973
 
15974
  arg0 = TREE_VALUE (arglist);
15975
  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15976
  arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15977
 
15978
  tmode = TYPE_MODE (TREE_TYPE (arg0));
15979
  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15980
  gcc_assert (VECTOR_MODE_P (tmode));
15981
 
15982
  op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15983
  op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15984
  elt = get_element_number (TREE_TYPE (arg0), arg2);
15985
 
15986
  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15987
    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15988
 
15989
  op0 = force_reg (tmode, op0);
15990
  op1 = force_reg (mode1, op1);
15991
 
15992
  /* OP0 is the source of these builtin functions and shouldn't be
15993
     modified.  Create a copy, use it and return it as target.  */
15994
  target = gen_reg_rtx (tmode);
15995
  emit_move_insn (target, op0);
15996
  ix86_expand_vector_set (true, target, op1, elt);
15997
 
15998
  return target;
15999
}
16000
 
16001
/* Expand an expression EXP that calls a built-in function,
16002
   with result going to TARGET if that's convenient
16003
   (and in mode MODE if that's convenient).
16004
   SUBTARGET may be used as the target for computing one of EXP's operands.
16005
   IGNORE is nonzero if the value is to be ignored.  */
16006
 
16007
static rtx
16008
ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16009
                     enum machine_mode mode ATTRIBUTE_UNUSED,
16010
                     int ignore ATTRIBUTE_UNUSED)
16011
{
16012
  const struct builtin_description *d;
16013
  size_t i;
16014
  enum insn_code icode;
16015
  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16016
  tree arglist = TREE_OPERAND (exp, 1);
16017
  tree arg0, arg1, arg2;
16018
  rtx op0, op1, op2, pat;
16019
  enum machine_mode tmode, mode0, mode1, mode2;
16020
  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16021
 
16022
  switch (fcode)
16023
    {
16024
    case IX86_BUILTIN_EMMS:
16025
      emit_insn (gen_mmx_emms ());
16026
      return 0;
16027
 
16028
    case IX86_BUILTIN_SFENCE:
16029
      emit_insn (gen_sse_sfence ());
16030
      return 0;
16031
 
16032
    case IX86_BUILTIN_MASKMOVQ:
16033
    case IX86_BUILTIN_MASKMOVDQU:
16034
      icode = (fcode == IX86_BUILTIN_MASKMOVQ
16035
               ? CODE_FOR_mmx_maskmovq
16036
               : CODE_FOR_sse2_maskmovdqu);
16037
      /* Note the arg order is different from the operand order.  */
16038
      arg1 = TREE_VALUE (arglist);
16039
      arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16040
      arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16041
      op0 = expand_normal (arg0);
16042
      op1 = expand_normal (arg1);
16043
      op2 = expand_normal (arg2);
16044
      mode0 = insn_data[icode].operand[0].mode;
16045
      mode1 = insn_data[icode].operand[1].mode;
16046
      mode2 = insn_data[icode].operand[2].mode;
16047
 
16048
      op0 = force_reg (Pmode, op0);
16049
      op0 = gen_rtx_MEM (mode1, op0);
16050
 
16051
      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16052
        op0 = copy_to_mode_reg (mode0, op0);
16053
      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16054
        op1 = copy_to_mode_reg (mode1, op1);
16055
      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16056
        op2 = copy_to_mode_reg (mode2, op2);
16057
      pat = GEN_FCN (icode) (op0, op1, op2);
16058
      if (! pat)
16059
        return 0;
16060
      emit_insn (pat);
16061
      return 0;
16062
 
16063
    case IX86_BUILTIN_SQRTSS:
16064
      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16065
    case IX86_BUILTIN_RSQRTSS:
16066
      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16067
    case IX86_BUILTIN_RCPSS:
16068
      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16069
 
16070
    case IX86_BUILTIN_LOADUPS:
16071
      return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16072
 
16073
    case IX86_BUILTIN_STOREUPS:
16074
      return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16075
 
16076
    case IX86_BUILTIN_LOADHPS:
16077
    case IX86_BUILTIN_LOADLPS:
16078
    case IX86_BUILTIN_LOADHPD:
16079
    case IX86_BUILTIN_LOADLPD:
16080
      icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16081
               : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16082
               : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16083
               : CODE_FOR_sse2_loadlpd);
16084
      arg0 = TREE_VALUE (arglist);
16085
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16086
      op0 = expand_normal (arg0);
16087
      op1 = expand_normal (arg1);
16088
      tmode = insn_data[icode].operand[0].mode;
16089
      mode0 = insn_data[icode].operand[1].mode;
16090
      mode1 = insn_data[icode].operand[2].mode;
16091
 
16092
      op0 = force_reg (mode0, op0);
16093
      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16094
      if (optimize || target == 0
16095
          || GET_MODE (target) != tmode
16096
          || !register_operand (target, tmode))
16097
        target = gen_reg_rtx (tmode);
16098
      pat = GEN_FCN (icode) (target, op0, op1);
16099
      if (! pat)
16100
        return 0;
16101
      emit_insn (pat);
16102
      return target;
16103
 
16104
    case IX86_BUILTIN_STOREHPS:
16105
    case IX86_BUILTIN_STORELPS:
16106
      icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16107
               : CODE_FOR_sse_storelps);
16108
      arg0 = TREE_VALUE (arglist);
16109
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16110
      op0 = expand_normal (arg0);
16111
      op1 = expand_normal (arg1);
16112
      mode0 = insn_data[icode].operand[0].mode;
16113
      mode1 = insn_data[icode].operand[1].mode;
16114
 
16115
      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16116
      op1 = force_reg (mode1, op1);
16117
 
16118
      pat = GEN_FCN (icode) (op0, op1);
16119
      if (! pat)
16120
        return 0;
16121
      emit_insn (pat);
16122
      return const0_rtx;
16123
 
16124
    case IX86_BUILTIN_MOVNTPS:
16125
      return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16126
    case IX86_BUILTIN_MOVNTQ:
16127
      return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16128
 
16129
    case IX86_BUILTIN_LDMXCSR:
16130
      op0 = expand_normal (TREE_VALUE (arglist));
16131
      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16132
      emit_move_insn (target, op0);
16133
      emit_insn (gen_sse_ldmxcsr (target));
16134
      return 0;
16135
 
16136
    case IX86_BUILTIN_STMXCSR:
16137
      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16138
      emit_insn (gen_sse_stmxcsr (target));
16139
      return copy_to_mode_reg (SImode, target);
16140
 
16141
    case IX86_BUILTIN_SHUFPS:
16142
    case IX86_BUILTIN_SHUFPD:
16143
      icode = (fcode == IX86_BUILTIN_SHUFPS
16144
               ? CODE_FOR_sse_shufps
16145
               : CODE_FOR_sse2_shufpd);
16146
      arg0 = TREE_VALUE (arglist);
16147
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16148
      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16149
      op0 = expand_normal (arg0);
16150
      op1 = expand_normal (arg1);
16151
      op2 = expand_normal (arg2);
16152
      tmode = insn_data[icode].operand[0].mode;
16153
      mode0 = insn_data[icode].operand[1].mode;
16154
      mode1 = insn_data[icode].operand[2].mode;
16155
      mode2 = insn_data[icode].operand[3].mode;
16156
 
16157
      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16158
        op0 = copy_to_mode_reg (mode0, op0);
16159
      if ((optimize && !register_operand (op1, mode1))
16160
          || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16161
        op1 = copy_to_mode_reg (mode1, op1);
16162
      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16163
        {
16164
          /* @@@ better error message */
16165
          error ("mask must be an immediate");
16166
          return gen_reg_rtx (tmode);
16167
        }
16168
      if (optimize || target == 0
16169
          || GET_MODE (target) != tmode
16170
          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16171
        target = gen_reg_rtx (tmode);
16172
      pat = GEN_FCN (icode) (target, op0, op1, op2);
16173
      if (! pat)
16174
        return 0;
16175
      emit_insn (pat);
16176
      return target;
16177
 
16178
    case IX86_BUILTIN_PSHUFW:
16179
    case IX86_BUILTIN_PSHUFD:
16180
    case IX86_BUILTIN_PSHUFHW:
16181
    case IX86_BUILTIN_PSHUFLW:
16182
      icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16183
               : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16184
               : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16185
               : CODE_FOR_mmx_pshufw);
16186
      arg0 = TREE_VALUE (arglist);
16187
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16188
      op0 = expand_normal (arg0);
16189
      op1 = expand_normal (arg1);
16190
      tmode = insn_data[icode].operand[0].mode;
16191
      mode1 = insn_data[icode].operand[1].mode;
16192
      mode2 = insn_data[icode].operand[2].mode;
16193
 
16194
      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16195
        op0 = copy_to_mode_reg (mode1, op0);
16196
      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16197
        {
16198
          /* @@@ better error message */
16199
          error ("mask must be an immediate");
16200
          return const0_rtx;
16201
        }
16202
      if (target == 0
16203
          || GET_MODE (target) != tmode
16204
          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16205
        target = gen_reg_rtx (tmode);
16206
      pat = GEN_FCN (icode) (target, op0, op1);
16207
      if (! pat)
16208
        return 0;
16209
      emit_insn (pat);
16210
      return target;
16211
 
16212
    case IX86_BUILTIN_PSLLWI128:
16213
      icode = CODE_FOR_ashlv8hi3;
16214
      goto do_pshifti;
16215
    case IX86_BUILTIN_PSLLDI128:
16216
      icode = CODE_FOR_ashlv4si3;
16217
      goto do_pshifti;
16218
    case IX86_BUILTIN_PSLLQI128:
16219
      icode = CODE_FOR_ashlv2di3;
16220
      goto do_pshifti;
16221
    case IX86_BUILTIN_PSRAWI128:
16222
      icode = CODE_FOR_ashrv8hi3;
16223
      goto do_pshifti;
16224
    case IX86_BUILTIN_PSRADI128:
16225
      icode = CODE_FOR_ashrv4si3;
16226
      goto do_pshifti;
16227
    case IX86_BUILTIN_PSRLWI128:
16228
      icode = CODE_FOR_lshrv8hi3;
16229
      goto do_pshifti;
16230
    case IX86_BUILTIN_PSRLDI128:
16231
      icode = CODE_FOR_lshrv4si3;
16232
      goto do_pshifti;
16233
    case IX86_BUILTIN_PSRLQI128:
16234
      icode = CODE_FOR_lshrv2di3;
16235
      goto do_pshifti;
16236
    do_pshifti:
16237
      arg0 = TREE_VALUE (arglist);
16238
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16239
      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16240
      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16241
 
16242
      if (GET_CODE (op1) != CONST_INT)
16243
        {
16244
          error ("shift must be an immediate");
16245
          return const0_rtx;
16246
        }
16247
      if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16248
        op1 = GEN_INT (255);
16249
 
16250
      tmode = insn_data[icode].operand[0].mode;
16251
      mode1 = insn_data[icode].operand[1].mode;
16252
      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16253
        op0 = copy_to_reg (op0);
16254
 
16255
      target = gen_reg_rtx (tmode);
16256
      pat = GEN_FCN (icode) (target, op0, op1);
16257
      if (!pat)
16258
        return 0;
16259
      emit_insn (pat);
16260
      return target;
16261
 
16262
    case IX86_BUILTIN_PSLLW128:
16263
      icode = CODE_FOR_ashlv8hi3;
16264
      goto do_pshift;
16265
    case IX86_BUILTIN_PSLLD128:
16266
      icode = CODE_FOR_ashlv4si3;
16267
      goto do_pshift;
16268
    case IX86_BUILTIN_PSLLQ128:
16269
      icode = CODE_FOR_ashlv2di3;
16270
      goto do_pshift;
16271
    case IX86_BUILTIN_PSRAW128:
16272
      icode = CODE_FOR_ashrv8hi3;
16273
      goto do_pshift;
16274
    case IX86_BUILTIN_PSRAD128:
16275
      icode = CODE_FOR_ashrv4si3;
16276
      goto do_pshift;
16277
    case IX86_BUILTIN_PSRLW128:
16278
      icode = CODE_FOR_lshrv8hi3;
16279
      goto do_pshift;
16280
    case IX86_BUILTIN_PSRLD128:
16281
      icode = CODE_FOR_lshrv4si3;
16282
      goto do_pshift;
16283
    case IX86_BUILTIN_PSRLQ128:
16284
      icode = CODE_FOR_lshrv2di3;
16285
      goto do_pshift;
16286
    do_pshift:
16287
      arg0 = TREE_VALUE (arglist);
16288
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16289
      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16290
      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16291
 
16292
      tmode = insn_data[icode].operand[0].mode;
16293
      mode1 = insn_data[icode].operand[1].mode;
16294
 
16295
      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16296
        op0 = copy_to_reg (op0);
16297
 
16298
      op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16299
      if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16300
        op1 = copy_to_reg (op1);
16301
 
16302
      target = gen_reg_rtx (tmode);
16303
      pat = GEN_FCN (icode) (target, op0, op1);
16304
      if (!pat)
16305
        return 0;
16306
      emit_insn (pat);
16307
      return target;
16308
 
16309
    case IX86_BUILTIN_PSLLDQI128:
16310
    case IX86_BUILTIN_PSRLDQI128:
16311
      icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16312
               : CODE_FOR_sse2_lshrti3);
16313
      arg0 = TREE_VALUE (arglist);
16314
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16315
      op0 = expand_normal (arg0);
16316
      op1 = expand_normal (arg1);
16317
      tmode = insn_data[icode].operand[0].mode;
16318
      mode1 = insn_data[icode].operand[1].mode;
16319
      mode2 = insn_data[icode].operand[2].mode;
16320
 
16321
      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16322
        {
16323
          op0 = copy_to_reg (op0);
16324
          op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16325
        }
16326
      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16327
        {
16328
          error ("shift must be an immediate");
16329
          return const0_rtx;
16330
        }
16331
      target = gen_reg_rtx (V2DImode);
16332
      pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16333
                             op0, op1);
16334
      if (! pat)
16335
        return 0;
16336
      emit_insn (pat);
16337
      return target;
16338
 
16339
    case IX86_BUILTIN_FEMMS:
16340
      emit_insn (gen_mmx_femms ());
16341
      return NULL_RTX;
16342
 
16343
    case IX86_BUILTIN_PAVGUSB:
16344
      return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16345
 
16346
    case IX86_BUILTIN_PF2ID:
16347
      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16348
 
16349
    case IX86_BUILTIN_PFACC:
16350
      return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16351
 
16352
    case IX86_BUILTIN_PFADD:
16353
     return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16354
 
16355
    case IX86_BUILTIN_PFCMPEQ:
16356
      return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16357
 
16358
    case IX86_BUILTIN_PFCMPGE:
16359
      return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16360
 
16361
    case IX86_BUILTIN_PFCMPGT:
16362
      return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16363
 
16364
    case IX86_BUILTIN_PFMAX:
16365
      return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16366
 
16367
    case IX86_BUILTIN_PFMIN:
16368
      return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16369
 
16370
    case IX86_BUILTIN_PFMUL:
16371
      return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16372
 
16373
    case IX86_BUILTIN_PFRCP:
16374
      return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16375
 
16376
    case IX86_BUILTIN_PFRCPIT1:
16377
      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16378
 
16379
    case IX86_BUILTIN_PFRCPIT2:
16380
      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16381
 
16382
    case IX86_BUILTIN_PFRSQIT1:
16383
      return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16384
 
16385
    case IX86_BUILTIN_PFRSQRT:
16386
      return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16387
 
16388
    case IX86_BUILTIN_PFSUB:
16389
      return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16390
 
16391
    case IX86_BUILTIN_PFSUBR:
16392
      return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16393
 
16394
    case IX86_BUILTIN_PI2FD:
16395
      return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16396
 
16397
    case IX86_BUILTIN_PMULHRW:
16398
      return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16399
 
16400
    case IX86_BUILTIN_PF2IW:
16401
      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16402
 
16403
    case IX86_BUILTIN_PFNACC:
16404
      return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16405
 
16406
    case IX86_BUILTIN_PFPNACC:
16407
      return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16408
 
16409
    case IX86_BUILTIN_PI2FW:
16410
      return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16411
 
16412
    case IX86_BUILTIN_PSWAPDSI:
16413
      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16414
 
16415
    case IX86_BUILTIN_PSWAPDSF:
16416
      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16417
 
16418
    case IX86_BUILTIN_SQRTSD:
16419
      return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16420
    case IX86_BUILTIN_LOADUPD:
16421
      return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16422
    case IX86_BUILTIN_STOREUPD:
16423
      return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16424
 
16425
    case IX86_BUILTIN_MFENCE:
16426
        emit_insn (gen_sse2_mfence ());
16427
        return 0;
16428
    case IX86_BUILTIN_LFENCE:
16429
        emit_insn (gen_sse2_lfence ());
16430
        return 0;
16431
 
16432
    case IX86_BUILTIN_CLFLUSH:
16433
        arg0 = TREE_VALUE (arglist);
16434
        op0 = expand_normal (arg0);
16435
        icode = CODE_FOR_sse2_clflush;
16436
        if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16437
            op0 = copy_to_mode_reg (Pmode, op0);
16438
 
16439
        emit_insn (gen_sse2_clflush (op0));
16440
        return 0;
16441
 
16442
    case IX86_BUILTIN_MOVNTPD:
16443
      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16444
    case IX86_BUILTIN_MOVNTDQ:
16445
      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16446
    case IX86_BUILTIN_MOVNTI:
16447
      return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16448
 
16449
    case IX86_BUILTIN_LOADDQU:
16450
      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16451
    case IX86_BUILTIN_STOREDQU:
16452
      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16453
 
16454
    case IX86_BUILTIN_MONITOR:
16455
      arg0 = TREE_VALUE (arglist);
16456
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16457
      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16458
      op0 = expand_normal (arg0);
16459
      op1 = expand_normal (arg1);
16460
      op2 = expand_normal (arg2);
16461
      if (!REG_P (op0))
16462
        op0 = copy_to_mode_reg (Pmode, op0);
16463
      if (!REG_P (op1))
16464
        op1 = copy_to_mode_reg (SImode, op1);
16465
      if (!REG_P (op2))
16466
        op2 = copy_to_mode_reg (SImode, op2);
16467
      if (!TARGET_64BIT)
16468
        emit_insn (gen_sse3_monitor (op0, op1, op2));
16469
      else
16470
        emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16471
      return 0;
16472
 
16473
    case IX86_BUILTIN_MWAIT:
16474
      arg0 = TREE_VALUE (arglist);
16475
      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16476
      op0 = expand_normal (arg0);
16477
      op1 = expand_normal (arg1);
16478
      if (!REG_P (op0))
16479
        op0 = copy_to_mode_reg (SImode, op0);
16480
      if (!REG_P (op1))
16481
        op1 = copy_to_mode_reg (SImode, op1);
16482
      emit_insn (gen_sse3_mwait (op0, op1));
16483
      return 0;
16484
 
16485
    case IX86_BUILTIN_LDDQU:
16486
      return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16487
                                       target, 1);
16488
 
16489
    case IX86_BUILTIN_VEC_INIT_V2SI:
16490
    case IX86_BUILTIN_VEC_INIT_V4HI:
16491
    case IX86_BUILTIN_VEC_INIT_V8QI:
16492
      return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16493
 
16494
    case IX86_BUILTIN_VEC_EXT_V2DF:
16495
    case IX86_BUILTIN_VEC_EXT_V2DI:
16496
    case IX86_BUILTIN_VEC_EXT_V4SF:
16497
    case IX86_BUILTIN_VEC_EXT_V4SI:
16498
    case IX86_BUILTIN_VEC_EXT_V8HI:
16499
    case IX86_BUILTIN_VEC_EXT_V16QI:
16500
    case IX86_BUILTIN_VEC_EXT_V2SI:
16501
    case IX86_BUILTIN_VEC_EXT_V4HI:
16502
      return ix86_expand_vec_ext_builtin (arglist, target);
16503
 
16504
    case IX86_BUILTIN_VEC_SET_V8HI:
16505
    case IX86_BUILTIN_VEC_SET_V4HI:
16506
      return ix86_expand_vec_set_builtin (arglist);
16507
 
16508
    default:
16509
      break;
16510
    }
16511
 
16512
  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16513
    if (d->code == fcode)
16514
      {
16515
        /* Compares are treated specially.  */
16516
        if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16517
            || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16518
            || d->icode == CODE_FOR_sse2_maskcmpv2df3
16519
            || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16520
          return ix86_expand_sse_compare (d, arglist, target);
16521
 
16522
        return ix86_expand_binop_builtin (d->icode, arglist, target);
16523
      }
16524
 
16525
  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16526
    if (d->code == fcode)
16527
      return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16528
 
16529
  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16530
    if (d->code == fcode)
16531
      return ix86_expand_sse_comi (d, arglist, target);
16532
 
16533
  gcc_unreachable ();
16534
}
16535
 
16536
/* Store OPERAND to the memory after reload is completed.  This means
16537
   that we can't easily use assign_stack_local.  */
16538
rtx
16539
ix86_force_to_memory (enum machine_mode mode, rtx operand)
16540
{
16541
  rtx result;
16542
 
16543
  gcc_assert (reload_completed);
16544
  if (TARGET_RED_ZONE)
16545
    {
16546
      result = gen_rtx_MEM (mode,
16547
                            gen_rtx_PLUS (Pmode,
16548
                                          stack_pointer_rtx,
16549
                                          GEN_INT (-RED_ZONE_SIZE)));
16550
      emit_move_insn (result, operand);
16551
    }
16552
  else if (!TARGET_RED_ZONE && TARGET_64BIT)
16553
    {
16554
      switch (mode)
16555
        {
16556
        case HImode:
16557
        case SImode:
16558
          operand = gen_lowpart (DImode, operand);
16559
          /* FALLTHRU */
16560
        case DImode:
16561
          emit_insn (
16562
                      gen_rtx_SET (VOIDmode,
16563
                                   gen_rtx_MEM (DImode,
16564
                                                gen_rtx_PRE_DEC (DImode,
16565
                                                        stack_pointer_rtx)),
16566
                                   operand));
16567
          break;
16568
        default:
16569
          gcc_unreachable ();
16570
        }
16571
      result = gen_rtx_MEM (mode, stack_pointer_rtx);
16572
    }
16573
  else
16574
    {
16575
      switch (mode)
16576
        {
16577
        case DImode:
16578
          {
16579
            rtx operands[2];
16580
            split_di (&operand, 1, operands, operands + 1);
16581
            emit_insn (
16582
                        gen_rtx_SET (VOIDmode,
16583
                                     gen_rtx_MEM (SImode,
16584
                                                  gen_rtx_PRE_DEC (Pmode,
16585
                                                        stack_pointer_rtx)),
16586
                                     operands[1]));
16587
            emit_insn (
16588
                        gen_rtx_SET (VOIDmode,
16589
                                     gen_rtx_MEM (SImode,
16590
                                                  gen_rtx_PRE_DEC (Pmode,
16591
                                                        stack_pointer_rtx)),
16592
                                     operands[0]));
16593
          }
16594
          break;
16595
        case HImode:
16596
          /* Store HImodes as SImodes.  */
16597
          operand = gen_lowpart (SImode, operand);
16598
          /* FALLTHRU */
16599
        case SImode:
16600
          emit_insn (
16601
                      gen_rtx_SET (VOIDmode,
16602
                                   gen_rtx_MEM (GET_MODE (operand),
16603
                                                gen_rtx_PRE_DEC (SImode,
16604
                                                        stack_pointer_rtx)),
16605
                                   operand));
16606
          break;
16607
        default:
16608
          gcc_unreachable ();
16609
        }
16610
      result = gen_rtx_MEM (mode, stack_pointer_rtx);
16611
    }
16612
  return result;
16613
}
16614
 
16615
/* Free operand from the memory.  */
16616
void
16617
ix86_free_from_memory (enum machine_mode mode)
16618
{
16619
  if (!TARGET_RED_ZONE)
16620
    {
16621
      int size;
16622
 
16623
      if (mode == DImode || TARGET_64BIT)
16624
        size = 8;
16625
      else
16626
        size = 4;
16627
      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
16628
         to pop or add instruction if registers are available.  */
16629
      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16630
                              gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16631
                                            GEN_INT (size))));
16632
    }
16633
}
16634
 
16635
/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16636
   QImode must go into class Q_REGS.
16637
   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
16638
   movdf to do mem-to-mem moves through integer regs.  */
16639
enum reg_class
16640
ix86_preferred_reload_class (rtx x, enum reg_class class)
16641
{
16642
  enum machine_mode mode = GET_MODE (x);
16643
 
16644
  /* We're only allowed to return a subclass of CLASS.  Many of the
16645
     following checks fail for NO_REGS, so eliminate that early.  */
16646
  if (class == NO_REGS)
16647
    return NO_REGS;
16648
 
16649
  /* All classes can load zeros.  */
16650
  if (x == CONST0_RTX (mode))
16651
    return class;
16652
 
16653
  /* Force constants into memory if we are loading a (nonzero) constant into
16654
     an MMX or SSE register.  This is because there are no MMX/SSE instructions
16655
     to load from a constant.  */
16656
  if (CONSTANT_P (x)
16657
      && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16658
    return NO_REGS;
16659
 
16660
  /* Prefer SSE regs only, if we can use them for math.  */
16661
  if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16662
    return SSE_CLASS_P (class) ? class : NO_REGS;
16663
 
16664
  /* Floating-point constants need more complex checks.  */
16665
  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16666
    {
16667
      /* General regs can load everything.  */
16668
      if (reg_class_subset_p (class, GENERAL_REGS))
16669
        return class;
16670
 
16671
      /* Floats can load 0 and 1 plus some others.  Note that we eliminated
16672
         zero above.  We only want to wind up preferring 80387 registers if
16673
         we plan on doing computation with them.  */
16674
      if (TARGET_80387
16675
          && standard_80387_constant_p (x))
16676
        {
16677
          /* Limit class to non-sse.  */
16678
          if (class == FLOAT_SSE_REGS)
16679
            return FLOAT_REGS;
16680
          if (class == FP_TOP_SSE_REGS)
16681
            return FP_TOP_REG;
16682
          if (class == FP_SECOND_SSE_REGS)
16683
            return FP_SECOND_REG;
16684
          if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16685
            return class;
16686
        }
16687
 
16688
      return NO_REGS;
16689
    }
16690
 
16691
  /* Generally when we see PLUS here, it's the function invariant
16692
     (plus soft-fp const_int).  Which can only be computed into general
16693
     regs.  */
16694
  if (GET_CODE (x) == PLUS)
16695
    return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16696
 
16697
  /* QImode constants are easy to load, but non-constant QImode data
16698
     must go into Q_REGS.  */
16699
  if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16700
    {
16701
      if (reg_class_subset_p (class, Q_REGS))
16702
        return class;
16703
      if (reg_class_subset_p (Q_REGS, class))
16704
        return Q_REGS;
16705
      return NO_REGS;
16706
    }
16707
 
16708
  return class;
16709
}
16710
 
16711
/* Discourage putting floating-point values in SSE registers unless
16712
   SSE math is being used, and likewise for the 387 registers.  */
16713
enum reg_class
16714
ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16715
{
16716
  enum machine_mode mode = GET_MODE (x);
16717
 
16718
  /* Restrict the output reload class to the register bank that we are doing
16719
     math on.  If we would like not to return a subset of CLASS, reject this
16720
     alternative: if reload cannot do this, it will still use its choice.  */
16721
  mode = GET_MODE (x);
16722
  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16723
    return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16724
 
16725
  if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16726
    {
16727
      if (class == FP_TOP_SSE_REGS)
16728
        return FP_TOP_REG;
16729
      else if (class == FP_SECOND_SSE_REGS)
16730
        return FP_SECOND_REG;
16731
      else
16732
        return FLOAT_CLASS_P (class) ? class : NO_REGS;
16733
    }
16734
 
16735
  return class;
16736
}
16737
 
16738
/* If we are copying between general and FP registers, we need a memory
16739
   location. The same is true for SSE and MMX registers.
16740
 
16741
   The macro can't work reliably when one of the CLASSES is class containing
16742
   registers from multiple units (SSE, MMX, integer).  We avoid this by never
16743
   combining those units in single alternative in the machine description.
16744
   Ensure that this constraint holds to avoid unexpected surprises.
16745
 
16746
   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16747
   enforce these sanity checks.  */
16748
 
16749
int
16750
ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16751
                              enum machine_mode mode, int strict)
16752
{
16753
  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16754
      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16755
      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16756
      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16757
      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16758
      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16759
    {
16760
      gcc_assert (!strict);
16761
      return true;
16762
    }
16763
 
16764
  if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16765
    return true;
16766
 
16767
  /* ??? This is a lie.  We do have moves between mmx/general, and for
16768
     mmx/sse2.  But by saying we need secondary memory we discourage the
16769
     register allocator from using the mmx registers unless needed.  */
16770
  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16771
    return true;
16772
 
16773
  if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16774
    {
16775
      /* SSE1 doesn't have any direct moves from other classes.  */
16776
      if (!TARGET_SSE2)
16777
        return true;
16778
 
16779
      /* If the target says that inter-unit moves are more expensive
16780
         than moving through memory, then don't generate them.  */
16781
      if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16782
        return true;
16783
 
16784
      /* Between SSE and general, we have moves no larger than word size.  */
16785
      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16786
        return true;
16787
 
16788
      /* ??? For the cost of one register reformat penalty, we could use
16789
         the same instructions to move SFmode and DFmode data, but the
16790
         relevant move patterns don't support those alternatives.  */
16791
      if (mode == SFmode || mode == DFmode)
16792
        return true;
16793
    }
16794
 
16795
  return false;
16796
}
16797
 
16798
/* Return true if the registers in CLASS cannot represent the change from
16799
   modes FROM to TO.  */
16800
 
16801
bool
16802
ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16803
                               enum reg_class class)
16804
{
16805
  if (from == to)
16806
    return false;
16807
 
16808
  /* x87 registers can't do subreg at all, as all values are reformatted
16809
     to extended precision.  */
16810
  if (MAYBE_FLOAT_CLASS_P (class))
16811
    return true;
16812
 
16813
  if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16814
    {
16815
      /* Vector registers do not support QI or HImode loads.  If we don't
16816
         disallow a change to these modes, reload will assume it's ok to
16817
         drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
16818
         the vec_dupv4hi pattern.  */
16819
      if (GET_MODE_SIZE (from) < 4)
16820
        return true;
16821
 
16822
      /* Vector registers do not support subreg with nonzero offsets, which
16823
         are otherwise valid for integer registers.  Since we can't see
16824
         whether we have a nonzero offset from here, prohibit all
16825
         nonparadoxical subregs changing size.  */
16826
      if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16827
        return true;
16828
    }
16829
 
16830
  return false;
16831
}
16832
 
16833
/* Return the cost of moving data from a register in class CLASS1 to
16834
   one in class CLASS2.
16835
 
16836
   It is not required that the cost always equal 2 when FROM is the same as TO;
16837
   on some machines it is expensive to move between registers if they are not
16838
   general registers.  */
16839
 
16840
int
16841
ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16842
                         enum reg_class class2)
16843
{
16844
  /* In case we require secondary memory, compute cost of the store followed
16845
     by load.  In order to avoid bad register allocation choices, we need
16846
     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
16847
 
16848
  if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16849
    {
16850
      int cost = 1;
16851
 
16852
      cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16853
                   MEMORY_MOVE_COST (mode, class1, 1));
16854
      cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16855
                   MEMORY_MOVE_COST (mode, class2, 1));
16856
 
16857
      /* In case of copying from general_purpose_register we may emit multiple
16858
         stores followed by single load causing memory size mismatch stall.
16859
         Count this as arbitrarily high cost of 20.  */
16860
      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16861
        cost += 20;
16862
 
16863
      /* In the case of FP/MMX moves, the registers actually overlap, and we
16864
         have to switch modes in order to treat them differently.  */
16865
      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16866
          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16867
        cost += 20;
16868
 
16869
      return cost;
16870
    }
16871
 
16872
  /* Moves between SSE/MMX and integer unit are expensive.  */
16873
  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16874
      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16875
    return ix86_cost->mmxsse_to_integer;
16876
  if (MAYBE_FLOAT_CLASS_P (class1))
16877
    return ix86_cost->fp_move;
16878
  if (MAYBE_SSE_CLASS_P (class1))
16879
    return ix86_cost->sse_move;
16880
  if (MAYBE_MMX_CLASS_P (class1))
16881
    return ix86_cost->mmx_move;
16882
  return 2;
16883
}
16884
 
16885
/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
16886
 
16887
bool
16888
ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16889
{
16890
  /* Flags and only flags can only hold CCmode values.  */
16891
  if (CC_REGNO_P (regno))
16892
    return GET_MODE_CLASS (mode) == MODE_CC;
16893
  if (GET_MODE_CLASS (mode) == MODE_CC
16894
      || GET_MODE_CLASS (mode) == MODE_RANDOM
16895
      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16896
    return 0;
16897
  if (FP_REGNO_P (regno))
16898
    return VALID_FP_MODE_P (mode);
16899
  if (SSE_REGNO_P (regno))
16900
    {
16901
      /* We implement the move patterns for all vector modes into and
16902
         out of SSE registers, even when no operation instructions
16903
         are available.  */
16904
      return (VALID_SSE_REG_MODE (mode)
16905
              || VALID_SSE2_REG_MODE (mode)
16906
              || VALID_MMX_REG_MODE (mode)
16907
              || VALID_MMX_REG_MODE_3DNOW (mode));
16908
    }
16909
  if (MMX_REGNO_P (regno))
16910
    {
16911
      /* We implement the move patterns for 3DNOW modes even in MMX mode,
16912
         so if the register is available at all, then we can move data of
16913
         the given mode into or out of it.  */
16914
      return (VALID_MMX_REG_MODE (mode)
16915
              || VALID_MMX_REG_MODE_3DNOW (mode));
16916
    }
16917
 
16918
  if (mode == QImode)
16919
    {
16920
      /* Take care for QImode values - they can be in non-QI regs,
16921
         but then they do cause partial register stalls.  */
16922
      if (regno < 4 || TARGET_64BIT)
16923
        return 1;
16924
      if (!TARGET_PARTIAL_REG_STALL)
16925
        return 1;
16926
      return reload_in_progress || reload_completed;
16927
    }
16928
  /* We handle both integer and floats in the general purpose registers.  */
16929
  else if (VALID_INT_MODE_P (mode))
16930
    return 1;
16931
  else if (VALID_FP_MODE_P (mode))
16932
    return 1;
16933
  /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
16934
     on to use that value in smaller contexts, this can easily force a
16935
     pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
16936
     supporting DImode, allow it.  */
16937
  else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16938
    return 1;
16939
 
16940
  return 0;
16941
}
16942
 
16943
/* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
16944
   tieable integer mode.  */
16945
 
16946
static bool
16947
ix86_tieable_integer_mode_p (enum machine_mode mode)
16948
{
16949
  switch (mode)
16950
    {
16951
    case HImode:
16952
    case SImode:
16953
      return true;
16954
 
16955
    case QImode:
16956
      return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16957
 
16958
    case DImode:
16959
      return TARGET_64BIT;
16960
 
16961
    default:
16962
      return false;
16963
    }
16964
}
16965
 
16966
/* Return true if MODE1 is accessible in a register that can hold MODE2
16967
   without copying.  That is, all register classes that can hold MODE2
16968
   can also hold MODE1.  */
16969
 
16970
bool
16971
ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16972
{
16973
  if (mode1 == mode2)
16974
    return true;
16975
 
16976
  if (ix86_tieable_integer_mode_p (mode1)
16977
      && ix86_tieable_integer_mode_p (mode2))
16978
    return true;
16979
 
16980
  /* MODE2 being XFmode implies fp stack or general regs, which means we
16981
     can tie any smaller floating point modes to it.  Note that we do not
16982
     tie this with TFmode.  */
16983
  if (mode2 == XFmode)
16984
    return mode1 == SFmode || mode1 == DFmode;
16985
 
16986
  /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16987
     that we can tie it with SFmode.  */
16988
  if (mode2 == DFmode)
16989
    return mode1 == SFmode;
16990
 
16991
  /* If MODE2 is only appropriate for an SSE register, then tie with
16992
     any other mode acceptable to SSE registers.  */
16993
  if (GET_MODE_SIZE (mode2) >= 8
16994
      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16995
    return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16996
 
16997
  /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16998
     with any other mode acceptable to MMX registers.  */
16999
  if (GET_MODE_SIZE (mode2) == 8
17000
      && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17001
    return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17002
 
17003
  return false;
17004
}
17005
 
17006
/* Return the cost of moving data of mode M between a
17007
   register and memory.  A value of 2 is the default; this cost is
17008
   relative to those in `REGISTER_MOVE_COST'.
17009
 
17010
   If moving between registers and memory is more expensive than
17011
   between two registers, you should define this macro to express the
17012
   relative cost.
17013
 
17014
   Model also increased moving costs of QImode registers in non
17015
   Q_REGS classes.
17016
 */
17017
int
17018
ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17019
{
17020
  if (FLOAT_CLASS_P (class))
17021
    {
17022
      int index;
17023
      switch (mode)
17024
        {
17025
          case SFmode:
17026
            index = 0;
17027
            break;
17028
          case DFmode:
17029
            index = 1;
17030
            break;
17031
          case XFmode:
17032
            index = 2;
17033
            break;
17034
          default:
17035
            return 100;
17036
        }
17037
      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17038
    }
17039
  if (SSE_CLASS_P (class))
17040
    {
17041
      int index;
17042
      switch (GET_MODE_SIZE (mode))
17043
        {
17044
          case 4:
17045
            index = 0;
17046
            break;
17047
          case 8:
17048
            index = 1;
17049
            break;
17050
          case 16:
17051
            index = 2;
17052
            break;
17053
          default:
17054
            return 100;
17055
        }
17056
      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17057
    }
17058
  if (MMX_CLASS_P (class))
17059
    {
17060
      int index;
17061
      switch (GET_MODE_SIZE (mode))
17062
        {
17063
          case 4:
17064
            index = 0;
17065
            break;
17066
          case 8:
17067
            index = 1;
17068
            break;
17069
          default:
17070
            return 100;
17071
        }
17072
      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17073
    }
17074
  switch (GET_MODE_SIZE (mode))
17075
    {
17076
      case 1:
17077
        if (in)
17078
          return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17079
                  : ix86_cost->movzbl_load);
17080
        else
17081
          return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17082
                  : ix86_cost->int_store[0] + 4);
17083
        break;
17084
      case 2:
17085
        return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17086
      default:
17087
        /* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
17088
        if (mode == TFmode)
17089
          mode = XFmode;
17090
        return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17091
                * (((int) GET_MODE_SIZE (mode)
17092
                    + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17093
    }
17094
}
17095
 
17096
/* Compute a (partial) cost for rtx X.  Return true if the complete
17097
   cost has been computed, and false if subexpressions should be
17098
   scanned.  In either case, *TOTAL contains the cost result.  */
17099
 
17100
static bool
17101
ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17102
{
17103
  enum machine_mode mode = GET_MODE (x);
17104
 
17105
  switch (code)
17106
    {
17107
    case CONST_INT:
17108
    case CONST:
17109
    case LABEL_REF:
17110
    case SYMBOL_REF:
17111
      if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17112
        *total = 3;
17113
      else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17114
        *total = 2;
17115
      else if (flag_pic && SYMBOLIC_CONST (x)
17116
               && (!TARGET_64BIT
17117
                   || (!GET_CODE (x) != LABEL_REF
17118
                       && (GET_CODE (x) != SYMBOL_REF
17119
                           || !SYMBOL_REF_LOCAL_P (x)))))
17120
        *total = 1;
17121
      else
17122
        *total = 0;
17123
      return true;
17124
 
17125
    case CONST_DOUBLE:
17126
      if (mode == VOIDmode)
17127
        *total = 0;
17128
      else
17129
        switch (standard_80387_constant_p (x))
17130
          {
17131
          case 1: /* 0.0 */
17132
            *total = 1;
17133
            break;
17134
          default: /* Other constants */
17135
            *total = 2;
17136
            break;
17137
          case 0:
17138
          case -1:
17139
            /* Start with (MEM (SYMBOL_REF)), since that's where
17140
               it'll probably end up.  Add a penalty for size.  */
17141
            *total = (COSTS_N_INSNS (1)
17142
                      + (flag_pic != 0 && !TARGET_64BIT)
17143
                      + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17144
            break;
17145
          }
17146
      return true;
17147
 
17148
    case ZERO_EXTEND:
17149
      /* The zero extensions is often completely free on x86_64, so make
17150
         it as cheap as possible.  */
17151
      if (TARGET_64BIT && mode == DImode
17152
          && GET_MODE (XEXP (x, 0)) == SImode)
17153
        *total = 1;
17154
      else if (TARGET_ZERO_EXTEND_WITH_AND)
17155
        *total = ix86_cost->add;
17156
      else
17157
        *total = ix86_cost->movzx;
17158
      return false;
17159
 
17160
    case SIGN_EXTEND:
17161
      *total = ix86_cost->movsx;
17162
      return false;
17163
 
17164
    case ASHIFT:
17165
      if (GET_CODE (XEXP (x, 1)) == CONST_INT
17166
          && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17167
        {
17168
          HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17169
          if (value == 1)
17170
            {
17171
              *total = ix86_cost->add;
17172
              return false;
17173
            }
17174
          if ((value == 2 || value == 3)
17175
              && ix86_cost->lea <= ix86_cost->shift_const)
17176
            {
17177
              *total = ix86_cost->lea;
17178
              return false;
17179
            }
17180
        }
17181
      /* FALLTHRU */
17182
 
17183
    case ROTATE:
17184
    case ASHIFTRT:
17185
    case LSHIFTRT:
17186
    case ROTATERT:
17187
      if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17188
        {
17189
          if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17190
            {
17191
              if (INTVAL (XEXP (x, 1)) > 32)
17192
                *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17193
              else
17194
                *total = ix86_cost->shift_const * 2;
17195
            }
17196
          else
17197
            {
17198
              if (GET_CODE (XEXP (x, 1)) == AND)
17199
                *total = ix86_cost->shift_var * 2;
17200
              else
17201
                *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17202
            }
17203
        }
17204
      else
17205
        {
17206
          if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17207
            *total = ix86_cost->shift_const;
17208
          else
17209
            *total = ix86_cost->shift_var;
17210
        }
17211
      return false;
17212
 
17213
    case MULT:
17214
      if (FLOAT_MODE_P (mode))
17215
        {
17216
          *total = ix86_cost->fmul;
17217
          return false;
17218
        }
17219
      else
17220
        {
17221
          rtx op0 = XEXP (x, 0);
17222
          rtx op1 = XEXP (x, 1);
17223
          int nbits;
17224
          if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17225
            {
17226
              unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17227
              for (nbits = 0; value != 0; value &= value - 1)
17228
                nbits++;
17229
            }
17230
          else
17231
            /* This is arbitrary.  */
17232
            nbits = 7;
17233
 
17234
          /* Compute costs correctly for widening multiplication.  */
17235
          if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17236
              && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17237
                 == GET_MODE_SIZE (mode))
17238
            {
17239
              int is_mulwiden = 0;
17240
              enum machine_mode inner_mode = GET_MODE (op0);
17241
 
17242
              if (GET_CODE (op0) == GET_CODE (op1))
17243
                is_mulwiden = 1, op1 = XEXP (op1, 0);
17244
              else if (GET_CODE (op1) == CONST_INT)
17245
                {
17246
                  if (GET_CODE (op0) == SIGN_EXTEND)
17247
                    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17248
                                  == INTVAL (op1);
17249
                  else
17250
                    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17251
                }
17252
 
17253
              if (is_mulwiden)
17254
                op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17255
            }
17256
 
17257
          *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17258
                    + nbits * ix86_cost->mult_bit
17259
                    + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17260
 
17261
          return true;
17262
        }
17263
 
17264
    case DIV:
17265
    case UDIV:
17266
    case MOD:
17267
    case UMOD:
17268
      if (FLOAT_MODE_P (mode))
17269
        *total = ix86_cost->fdiv;
17270
      else
17271
        *total = ix86_cost->divide[MODE_INDEX (mode)];
17272
      return false;
17273
 
17274
    case PLUS:
17275
      if (FLOAT_MODE_P (mode))
17276
        *total = ix86_cost->fadd;
17277
      else if (GET_MODE_CLASS (mode) == MODE_INT
17278
               && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17279
        {
17280
          if (GET_CODE (XEXP (x, 0)) == PLUS
17281
              && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17282
              && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17283
              && CONSTANT_P (XEXP (x, 1)))
17284
            {
17285
              HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17286
              if (val == 2 || val == 4 || val == 8)
17287
                {
17288
                  *total = ix86_cost->lea;
17289
                  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17290
                  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17291
                                      outer_code);
17292
                  *total += rtx_cost (XEXP (x, 1), outer_code);
17293
                  return true;
17294
                }
17295
            }
17296
          else if (GET_CODE (XEXP (x, 0)) == MULT
17297
                   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17298
            {
17299
              HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17300
              if (val == 2 || val == 4 || val == 8)
17301
                {
17302
                  *total = ix86_cost->lea;
17303
                  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17304
                  *total += rtx_cost (XEXP (x, 1), outer_code);
17305
                  return true;
17306
                }
17307
            }
17308
          else if (GET_CODE (XEXP (x, 0)) == PLUS)
17309
            {
17310
              *total = ix86_cost->lea;
17311
              *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17312
              *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17313
              *total += rtx_cost (XEXP (x, 1), outer_code);
17314
              return true;
17315
            }
17316
        }
17317
      /* FALLTHRU */
17318
 
17319
    case MINUS:
17320
      if (FLOAT_MODE_P (mode))
17321
        {
17322
          *total = ix86_cost->fadd;
17323
          return false;
17324
        }
17325
      /* FALLTHRU */
17326
 
17327
    case AND:
17328
    case IOR:
17329
    case XOR:
17330
      if (!TARGET_64BIT && mode == DImode)
17331
        {
17332
          *total = (ix86_cost->add * 2
17333
                    + (rtx_cost (XEXP (x, 0), outer_code)
17334
                       << (GET_MODE (XEXP (x, 0)) != DImode))
17335
                    + (rtx_cost (XEXP (x, 1), outer_code)
17336
                       << (GET_MODE (XEXP (x, 1)) != DImode)));
17337
          return true;
17338
        }
17339
      /* FALLTHRU */
17340
 
17341
    case NEG:
17342
      if (FLOAT_MODE_P (mode))
17343
        {
17344
          *total = ix86_cost->fchs;
17345
          return false;
17346
        }
17347
      /* FALLTHRU */
17348
 
17349
    case NOT:
17350
      if (!TARGET_64BIT && mode == DImode)
17351
        *total = ix86_cost->add * 2;
17352
      else
17353
        *total = ix86_cost->add;
17354
      return false;
17355
 
17356
    case COMPARE:
17357
      if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17358
          && XEXP (XEXP (x, 0), 1) == const1_rtx
17359
          && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17360
          && XEXP (x, 1) == const0_rtx)
17361
        {
17362
          /* This kind of construct is implemented using test[bwl].
17363
             Treat it as if we had an AND.  */
17364
          *total = (ix86_cost->add
17365
                    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17366
                    + rtx_cost (const1_rtx, outer_code));
17367
          return true;
17368
        }
17369
      return false;
17370
 
17371
    case FLOAT_EXTEND:
17372
      if (!TARGET_SSE_MATH
17373
          || mode == XFmode
17374
          || (mode == DFmode && !TARGET_SSE2))
17375
        /* For standard 80387 constants, raise the cost to prevent
17376
           compress_float_constant() to generate load from memory.  */
17377
        switch (standard_80387_constant_p (XEXP (x, 0)))
17378
          {
17379
          case -1:
17380
          case 0:
17381
            *total = 0;
17382
            break;
17383
          case 1: /* 0.0 */
17384
            *total = 1;
17385
            break;
17386
          default:
17387
            *total = (x86_ext_80387_constants & TUNEMASK
17388
                      || optimize_size
17389
                      ? 1 : 0);
17390
          }
17391
      return false;
17392
 
17393
    case ABS:
17394
      if (FLOAT_MODE_P (mode))
17395
        *total = ix86_cost->fabs;
17396
      return false;
17397
 
17398
    case SQRT:
17399
      if (FLOAT_MODE_P (mode))
17400
        *total = ix86_cost->fsqrt;
17401
      return false;
17402
 
17403
    case UNSPEC:
17404
      if (XINT (x, 1) == UNSPEC_TP)
17405
        *total = 0;
17406
      return false;
17407
 
17408
    default:
17409
      return false;
17410
    }
17411
}
17412
 
17413
#if TARGET_MACHO
17414
 
17415
static int current_machopic_label_num;
17416
 
17417
/* Given a symbol name and its associated stub, write out the
17418
   definition of the stub.  */
17419
 
17420
void
17421
machopic_output_stub (FILE *file, const char *symb, const char *stub)
17422
{
17423
  unsigned int length;
17424
  char *binder_name, *symbol_name, lazy_ptr_name[32];
17425
  int label = ++current_machopic_label_num;
17426
 
17427
  /* For 64-bit we shouldn't get here.  */
17428
  gcc_assert (!TARGET_64BIT);
17429
 
17430
  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
17431
  symb = (*targetm.strip_name_encoding) (symb);
17432
 
17433
  length = strlen (stub);
17434
  binder_name = alloca (length + 32);
17435
  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17436
 
17437
  length = strlen (symb);
17438
  symbol_name = alloca (length + 32);
17439
  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17440
 
17441
  sprintf (lazy_ptr_name, "L%d$lz", label);
17442
 
17443
  if (MACHOPIC_PURE)
17444
    switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17445
  else
17446
    switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17447
 
17448
  fprintf (file, "%s:\n", stub);
17449
  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17450
 
17451
  if (MACHOPIC_PURE)
17452
    {
17453
      fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17454
      fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17455
      fprintf (file, "\tjmp\t*%%edx\n");
17456
    }
17457
  else
17458
    fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17459
 
17460
  fprintf (file, "%s:\n", binder_name);
17461
 
17462
  if (MACHOPIC_PURE)
17463
    {
17464
      fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17465
      fprintf (file, "\tpushl\t%%eax\n");
17466
    }
17467
  else
17468
    fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17469
 
17470
  fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17471
 
17472
  switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17473
  fprintf (file, "%s:\n", lazy_ptr_name);
17474
  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17475
  fprintf (file, "\t.long %s\n", binder_name);
17476
}
17477
 
17478
void
17479
darwin_x86_file_end (void)
17480
{
17481
  darwin_file_end ();
17482
  ix86_file_end ();
17483
}
17484
#endif /* TARGET_MACHO */
17485
 
17486
/* Order the registers for register allocator.  */
17487
 
17488
void
17489
x86_order_regs_for_local_alloc (void)
17490
{
17491
   int pos = 0;
17492
   int i;
17493
 
17494
   /* First allocate the local general purpose registers.  */
17495
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17496
     if (GENERAL_REGNO_P (i) && call_used_regs[i])
17497
        reg_alloc_order [pos++] = i;
17498
 
17499
   /* Global general purpose registers.  */
17500
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17501
     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17502
        reg_alloc_order [pos++] = i;
17503
 
17504
   /* x87 registers come first in case we are doing FP math
17505
      using them.  */
17506
   if (!TARGET_SSE_MATH)
17507
     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17508
       reg_alloc_order [pos++] = i;
17509
 
17510
   /* SSE registers.  */
17511
   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17512
     reg_alloc_order [pos++] = i;
17513
   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17514
     reg_alloc_order [pos++] = i;
17515
 
17516
   /* x87 registers.  */
17517
   if (TARGET_SSE_MATH)
17518
     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17519
       reg_alloc_order [pos++] = i;
17520
 
17521
   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17522
     reg_alloc_order [pos++] = i;
17523
 
17524
   /* Initialize the rest of array as we do not allocate some registers
17525
      at all.  */
17526
   while (pos < FIRST_PSEUDO_REGISTER)
17527
     reg_alloc_order [pos++] = 0;
17528
}
17529
 
17530
/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17531
   struct attribute_spec.handler.  */
17532
static tree
17533
ix86_handle_struct_attribute (tree *node, tree name,
17534
                              tree args ATTRIBUTE_UNUSED,
17535
                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17536
{
17537
  tree *type = NULL;
17538
  if (DECL_P (*node))
17539
    {
17540
      if (TREE_CODE (*node) == TYPE_DECL)
17541
        type = &TREE_TYPE (*node);
17542
    }
17543
  else
17544
    type = node;
17545
 
17546
  if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17547
                 || TREE_CODE (*type) == UNION_TYPE)))
17548
    {
17549
      warning (OPT_Wattributes, "%qs attribute ignored",
17550
               IDENTIFIER_POINTER (name));
17551
      *no_add_attrs = true;
17552
    }
17553
 
17554
  else if ((is_attribute_p ("ms_struct", name)
17555
            && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17556
           || ((is_attribute_p ("gcc_struct", name)
17557
                && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17558
    {
17559
      warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17560
               IDENTIFIER_POINTER (name));
17561
      *no_add_attrs = true;
17562
    }
17563
 
17564
  return NULL_TREE;
17565
}
17566
 
17567
static bool
17568
ix86_ms_bitfield_layout_p (tree record_type)
17569
{
17570
  return (TARGET_MS_BITFIELD_LAYOUT &&
17571
          !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17572
    || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17573
}
17574
 
17575
/* Returns an expression indicating where the this parameter is
17576
   located on entry to the FUNCTION.  */
17577
 
17578
static rtx
17579
x86_this_parameter (tree function)
17580
{
17581
  tree type = TREE_TYPE (function);
17582
 
17583
  if (TARGET_64BIT)
17584
    {
17585
      int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17586
      return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17587
    }
17588
 
17589
  if (ix86_function_regparm (type, function) > 0)
17590
    {
17591
      tree parm;
17592
 
17593
      parm = TYPE_ARG_TYPES (type);
17594
      /* Figure out whether or not the function has a variable number of
17595
         arguments.  */
17596
      for (; parm; parm = TREE_CHAIN (parm))
17597
        if (TREE_VALUE (parm) == void_type_node)
17598
          break;
17599
      /* If not, the this parameter is in the first argument.  */
17600
      if (parm)
17601
        {
17602
          int regno = 0;
17603
          if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17604
            regno = 2;
17605
          return gen_rtx_REG (SImode, regno);
17606
        }
17607
    }
17608
 
17609
  if (aggregate_value_p (TREE_TYPE (type), type))
17610
    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17611
  else
17612
    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17613
}
17614
 
17615
/* Determine whether x86_output_mi_thunk can succeed.  */
17616
 
17617
static bool
17618
x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17619
                         HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17620
                         HOST_WIDE_INT vcall_offset, tree function)
17621
{
17622
  /* 64-bit can handle anything.  */
17623
  if (TARGET_64BIT)
17624
    return true;
17625
 
17626
  /* For 32-bit, everything's fine if we have one free register.  */
17627
  if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17628
    return true;
17629
 
17630
  /* Need a free register for vcall_offset.  */
17631
  if (vcall_offset)
17632
    return false;
17633
 
17634
  /* Need a free register for GOT references.  */
17635
  if (flag_pic && !(*targetm.binds_local_p) (function))
17636
    return false;
17637
 
17638
  /* Otherwise ok.  */
17639
  return true;
17640
}
17641
 
17642
/* Output the assembler code for a thunk function.  THUNK_DECL is the
17643
   declaration for the thunk function itself, FUNCTION is the decl for
17644
   the target function.  DELTA is an immediate constant offset to be
17645
   added to THIS.  If VCALL_OFFSET is nonzero, the word at
17646
   *(*this + vcall_offset) should be added to THIS.  */
17647
 
17648
static void
17649
x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17650
                     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17651
                     HOST_WIDE_INT vcall_offset, tree function)
17652
{
17653
  rtx xops[3];
17654
  rtx this = x86_this_parameter (function);
17655
  rtx this_reg, tmp;
17656
 
17657
  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
17658
     pull it in now and let DELTA benefit.  */
17659
  if (REG_P (this))
17660
    this_reg = this;
17661
  else if (vcall_offset)
17662
    {
17663
      /* Put the this parameter into %eax.  */
17664
      xops[0] = this;
17665
      xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17666
      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17667
    }
17668
  else
17669
    this_reg = NULL_RTX;
17670
 
17671
  /* Adjust the this parameter by a fixed constant.  */
17672
  if (delta)
17673
    {
17674
      xops[0] = GEN_INT (delta);
17675
      xops[1] = this_reg ? this_reg : this;
17676
      if (TARGET_64BIT)
17677
        {
17678
          if (!x86_64_general_operand (xops[0], DImode))
17679
            {
17680
              tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17681
              xops[1] = tmp;
17682
              output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17683
              xops[0] = tmp;
17684
              xops[1] = this;
17685
            }
17686
          output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17687
        }
17688
      else
17689
        output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17690
    }
17691
 
17692
  /* Adjust the this parameter by a value stored in the vtable.  */
17693
  if (vcall_offset)
17694
    {
17695
      if (TARGET_64BIT)
17696
        tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17697
      else
17698
        {
17699
          int tmp_regno = 2 /* ECX */;
17700
          if (lookup_attribute ("fastcall",
17701
              TYPE_ATTRIBUTES (TREE_TYPE (function))))
17702
            tmp_regno = 0 /* EAX */;
17703
          tmp = gen_rtx_REG (SImode, tmp_regno);
17704
        }
17705
 
17706
      xops[0] = gen_rtx_MEM (Pmode, this_reg);
17707
      xops[1] = tmp;
17708
      if (TARGET_64BIT)
17709
        output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17710
      else
17711
        output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17712
 
17713
      /* Adjust the this parameter.  */
17714
      xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17715
      if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17716
        {
17717
          rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17718
          xops[0] = GEN_INT (vcall_offset);
17719
          xops[1] = tmp2;
17720
          output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17721
          xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17722
        }
17723
      xops[1] = this_reg;
17724
      if (TARGET_64BIT)
17725
        output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17726
      else
17727
        output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17728
    }
17729
 
17730
  /* If necessary, drop THIS back to its stack slot.  */
17731
  if (this_reg && this_reg != this)
17732
    {
17733
      xops[0] = this_reg;
17734
      xops[1] = this;
17735
      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17736
    }
17737
 
17738
  xops[0] = XEXP (DECL_RTL (function), 0);
17739
  if (TARGET_64BIT)
17740
    {
17741
      if (!flag_pic || (*targetm.binds_local_p) (function))
17742
        output_asm_insn ("jmp\t%P0", xops);
17743
      else
17744
        {
17745
          tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17746
          tmp = gen_rtx_CONST (Pmode, tmp);
17747
          tmp = gen_rtx_MEM (QImode, tmp);
17748
          xops[0] = tmp;
17749
          output_asm_insn ("jmp\t%A0", xops);
17750
        }
17751
    }
17752
  else
17753
    {
17754
      if (!flag_pic || (*targetm.binds_local_p) (function))
17755
        output_asm_insn ("jmp\t%P0", xops);
17756
      else
17757
#if TARGET_MACHO
17758
        if (TARGET_MACHO)
17759
          {
17760
            rtx sym_ref = XEXP (DECL_RTL (function), 0);
17761
            tmp = (gen_rtx_SYMBOL_REF
17762
                   (Pmode,
17763
                    machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17764
            tmp = gen_rtx_MEM (QImode, tmp);
17765
            xops[0] = tmp;
17766
            output_asm_insn ("jmp\t%0", xops);
17767
          }
17768
        else
17769
#endif /* TARGET_MACHO */
17770
        {
17771
          tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17772
          output_set_got (tmp, NULL_RTX);
17773
 
17774
          xops[1] = tmp;
17775
          output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17776
          output_asm_insn ("jmp\t{*}%1", xops);
17777
        }
17778
    }
17779
}
17780
 
17781
static void
17782
x86_file_start (void)
17783
{
17784
  default_file_start ();
17785
#if TARGET_MACHO
17786
  darwin_file_start ();
17787
#endif
17788
  if (X86_FILE_START_VERSION_DIRECTIVE)
17789
    fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17790
  if (X86_FILE_START_FLTUSED)
17791
    fputs ("\t.global\t__fltused\n", asm_out_file);
17792
  if (ix86_asm_dialect == ASM_INTEL)
17793
    fputs ("\t.intel_syntax\n", asm_out_file);
17794
}
17795
 
17796
int
17797
x86_field_alignment (tree field, int computed)
17798
{
17799
  enum machine_mode mode;
17800
  tree type = TREE_TYPE (field);
17801
 
17802
  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17803
    return computed;
17804
  mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17805
                    ? get_inner_array_type (type) : type);
17806
  if (mode == DFmode || mode == DCmode
17807
      || GET_MODE_CLASS (mode) == MODE_INT
17808
      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17809
    return MIN (32, computed);
17810
  return computed;
17811
}
17812
 
17813
/* Output assembler code to FILE to increment profiler label # LABELNO
17814
   for profiling a function entry.  */
17815
void
17816
x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17817
{
17818
  if (TARGET_64BIT)
17819
    if (flag_pic)
17820
      {
17821
#ifndef NO_PROFILE_COUNTERS
17822
        fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17823
#endif
17824
        fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17825
      }
17826
    else
17827
      {
17828
#ifndef NO_PROFILE_COUNTERS
17829
        fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17830
#endif
17831
        fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17832
      }
17833
  else if (flag_pic)
17834
    {
17835
#ifndef NO_PROFILE_COUNTERS
17836
      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17837
               LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17838
#endif
17839
      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17840
    }
17841
  else
17842
    {
17843
#ifndef NO_PROFILE_COUNTERS
17844
      fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17845
               PROFILE_COUNT_REGISTER);
17846
#endif
17847
      fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17848
    }
17849
}
17850
 
17851
/* We don't have exact information about the insn sizes, but we may assume
17852
   quite safely that we are informed about all 1 byte insns and memory
17853
   address sizes.  This is enough to eliminate unnecessary padding in
17854
   99% of cases.  */
17855
 
17856
static int
17857
min_insn_size (rtx insn)
17858
{
17859
  int l = 0;
17860
 
17861
  if (!INSN_P (insn) || !active_insn_p (insn))
17862
    return 0;
17863
 
17864
  /* Discard alignments we've emit and jump instructions.  */
17865
  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17866
      && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17867
    return 0;
17868
  if (GET_CODE (insn) == JUMP_INSN
17869
      && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17870
          || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17871
    return 0;
17872
 
17873
  /* Important case - calls are always 5 bytes.
17874
     It is common to have many calls in the row.  */
17875
  if (GET_CODE (insn) == CALL_INSN
17876
      && symbolic_reference_mentioned_p (PATTERN (insn))
17877
      && !SIBLING_CALL_P (insn))
17878
    return 5;
17879
  if (get_attr_length (insn) <= 1)
17880
    return 1;
17881
 
17882
  /* For normal instructions we may rely on the sizes of addresses
17883
     and the presence of symbol to require 4 bytes of encoding.
17884
     This is not the case for jumps where references are PC relative.  */
17885
  if (GET_CODE (insn) != JUMP_INSN)
17886
    {
17887
      l = get_attr_length_address (insn);
17888
      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17889
        l = 4;
17890
    }
17891
  if (l)
17892
    return 1+l;
17893
  else
17894
    return 2;
17895
}
17896
 
17897
/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17898
   window.  */
17899
 
17900
static void
17901
ix86_avoid_jump_misspredicts (void)
17902
{
17903
  rtx insn, start = get_insns ();
17904
  int nbytes = 0, njumps = 0;
17905
  int isjump = 0;
17906
 
17907
  /* Look for all minimal intervals of instructions containing 4 jumps.
17908
     The intervals are bounded by START and INSN.  NBYTES is the total
17909
     size of instructions in the interval including INSN and not including
17910
     START.  When the NBYTES is smaller than 16 bytes, it is possible
17911
     that the end of START and INSN ends up in the same 16byte page.
17912
 
17913
     The smallest offset in the page INSN can start is the case where START
17914
     ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
17915
     We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17916
     */
17917
  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17918
    {
17919
 
17920
      nbytes += min_insn_size (insn);
17921
      if (dump_file)
17922
        fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17923
                INSN_UID (insn), min_insn_size (insn));
17924
      if ((GET_CODE (insn) == JUMP_INSN
17925
           && GET_CODE (PATTERN (insn)) != ADDR_VEC
17926
           && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17927
          || GET_CODE (insn) == CALL_INSN)
17928
        njumps++;
17929
      else
17930
        continue;
17931
 
17932
      while (njumps > 3)
17933
        {
17934
          start = NEXT_INSN (start);
17935
          if ((GET_CODE (start) == JUMP_INSN
17936
               && GET_CODE (PATTERN (start)) != ADDR_VEC
17937
               && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17938
              || GET_CODE (start) == CALL_INSN)
17939
            njumps--, isjump = 1;
17940
          else
17941
            isjump = 0;
17942
          nbytes -= min_insn_size (start);
17943
        }
17944
      gcc_assert (njumps >= 0);
17945
      if (dump_file)
17946
        fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17947
                INSN_UID (start), INSN_UID (insn), nbytes);
17948
 
17949
      if (njumps == 3 && isjump && nbytes < 16)
17950
        {
17951
          int padsize = 15 - nbytes + min_insn_size (insn);
17952
 
17953
          if (dump_file)
17954
            fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17955
                     INSN_UID (insn), padsize);
17956
          emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17957
        }
17958
    }
17959
}
17960
 
17961
/* AMD Athlon works faster
17962
   when RET is not destination of conditional jump or directly preceded
17963
   by other jump instruction.  We avoid the penalty by inserting NOP just
17964
   before the RET instructions in such cases.  */
17965
static void
17966
ix86_pad_returns (void)
17967
{
17968
  edge e;
17969
  edge_iterator ei;
17970
 
17971
  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17972
    {
17973
      basic_block bb = e->src;
17974
      rtx ret = BB_END (bb);
17975
      rtx prev;
17976
      bool replace = false;
17977
 
17978
      if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17979
          || !maybe_hot_bb_p (bb))
17980
        continue;
17981
      for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17982
        if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17983
          break;
17984
      if (prev && GET_CODE (prev) == CODE_LABEL)
17985
        {
17986
          edge e;
17987
          edge_iterator ei;
17988
 
17989
          FOR_EACH_EDGE (e, ei, bb->preds)
17990
            if (EDGE_FREQUENCY (e) && e->src->index >= 0
17991
                && !(e->flags & EDGE_FALLTHRU))
17992
              replace = true;
17993
        }
17994
      if (!replace)
17995
        {
17996
          prev = prev_active_insn (ret);
17997
          if (prev
17998
              && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17999
                  || GET_CODE (prev) == CALL_INSN))
18000
            replace = true;
18001
          /* Empty functions get branch mispredict even when the jump destination
18002
             is not visible to us.  */
18003
          if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18004
            replace = true;
18005
        }
18006
      if (replace)
18007
        {
18008
          emit_insn_before (gen_return_internal_long (), ret);
18009
          delete_insn (ret);
18010
        }
18011
    }
18012
}
18013
 
18014
/* Implement machine specific optimizations.  We implement padding of returns
18015
   for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
18016
static void
18017
ix86_reorg (void)
18018
{
18019
  if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18020
    ix86_pad_returns ();
18021
  if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18022
    ix86_avoid_jump_misspredicts ();
18023
}
18024
 
18025
/* Return nonzero when QImode register that must be represented via REX prefix
18026
   is used.  */
18027
bool
18028
x86_extended_QIreg_mentioned_p (rtx insn)
18029
{
18030
  int i;
18031
  extract_insn_cached (insn);
18032
  for (i = 0; i < recog_data.n_operands; i++)
18033
    if (REG_P (recog_data.operand[i])
18034
        && REGNO (recog_data.operand[i]) >= 4)
18035
       return true;
18036
  return false;
18037
}
18038
 
18039
/* Return nonzero when P points to register encoded via REX prefix.
18040
   Called via for_each_rtx.  */
18041
static int
18042
extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18043
{
18044
   unsigned int regno;
18045
   if (!REG_P (*p))
18046
     return 0;
18047
   regno = REGNO (*p);
18048
   return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18049
}
18050
 
18051
/* Return true when INSN mentions register that must be encoded using REX
18052
   prefix.  */
18053
bool
18054
x86_extended_reg_mentioned_p (rtx insn)
18055
{
18056
  return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18057
}
18058
 
18059
/* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
18060
   optabs would emit if we didn't have TFmode patterns.  */
18061
 
18062
void
18063
x86_emit_floatuns (rtx operands[2])
18064
{
18065
  rtx neglab, donelab, i0, i1, f0, in, out;
18066
  enum machine_mode mode, inmode;
18067
 
18068
  inmode = GET_MODE (operands[1]);
18069
  gcc_assert (inmode == SImode || inmode == DImode);
18070
 
18071
  out = operands[0];
18072
  in = force_reg (inmode, operands[1]);
18073
  mode = GET_MODE (out);
18074
  neglab = gen_label_rtx ();
18075
  donelab = gen_label_rtx ();
18076
  i1 = gen_reg_rtx (Pmode);
18077
  f0 = gen_reg_rtx (mode);
18078
 
18079
  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18080
 
18081
  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18082
  emit_jump_insn (gen_jump (donelab));
18083
  emit_barrier ();
18084
 
18085
  emit_label (neglab);
18086
 
18087
  i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18088
  i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18089
  i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18090
  expand_float (f0, i0, 0);
18091
  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18092
 
18093
  emit_label (donelab);
18094
}
18095
 
18096
/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18097
   with all elements equal to VAR.  Return true if successful.  */
18098
 
18099
static bool
18100
ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18101
                                   rtx target, rtx val)
18102
{
18103
  enum machine_mode smode, wsmode, wvmode;
18104
  rtx x;
18105
 
18106
  switch (mode)
18107
    {
18108
    case V2SImode:
18109
    case V2SFmode:
18110
      if (!mmx_ok)
18111
        return false;
18112
      /* FALLTHRU */
18113
 
18114
    case V2DFmode:
18115
    case V2DImode:
18116
    case V4SFmode:
18117
    case V4SImode:
18118
      val = force_reg (GET_MODE_INNER (mode), val);
18119
      x = gen_rtx_VEC_DUPLICATE (mode, val);
18120
      emit_insn (gen_rtx_SET (VOIDmode, target, x));
18121
      return true;
18122
 
18123
    case V4HImode:
18124
      if (!mmx_ok)
18125
        return false;
18126
      if (TARGET_SSE || TARGET_3DNOW_A)
18127
        {
18128
          val = gen_lowpart (SImode, val);
18129
          x = gen_rtx_TRUNCATE (HImode, val);
18130
          x = gen_rtx_VEC_DUPLICATE (mode, x);
18131
          emit_insn (gen_rtx_SET (VOIDmode, target, x));
18132
          return true;
18133
        }
18134
      else
18135
        {
18136
          smode = HImode;
18137
          wsmode = SImode;
18138
          wvmode = V2SImode;
18139
          goto widen;
18140
        }
18141
 
18142
    case V8QImode:
18143
      if (!mmx_ok)
18144
        return false;
18145
      smode = QImode;
18146
      wsmode = HImode;
18147
      wvmode = V4HImode;
18148
      goto widen;
18149
    case V8HImode:
18150
      if (TARGET_SSE2)
18151
        {
18152
          rtx tmp1, tmp2;
18153
          /* Extend HImode to SImode using a paradoxical SUBREG.  */
18154
          tmp1 = gen_reg_rtx (SImode);
18155
          emit_move_insn (tmp1, gen_lowpart (SImode, val));
18156
          /* Insert the SImode value as low element of V4SImode vector. */
18157
          tmp2 = gen_reg_rtx (V4SImode);
18158
          tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18159
                                    gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18160
                                    CONST0_RTX (V4SImode),
18161
                                    const1_rtx);
18162
          emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18163
          /* Cast the V4SImode vector back to a V8HImode vector.  */
18164
          tmp1 = gen_reg_rtx (V8HImode);
18165
          emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18166
          /* Duplicate the low short through the whole low SImode word.  */
18167
          emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18168
          /* Cast the V8HImode vector back to a V4SImode vector.  */
18169
          tmp2 = gen_reg_rtx (V4SImode);
18170
          emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18171
          /* Replicate the low element of the V4SImode vector.  */
18172
          emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18173
          /* Cast the V2SImode back to V8HImode, and store in target.  */
18174
          emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18175
          return true;
18176
        }
18177
      smode = HImode;
18178
      wsmode = SImode;
18179
      wvmode = V4SImode;
18180
      goto widen;
18181
    case V16QImode:
18182
      if (TARGET_SSE2)
18183
        {
18184
          rtx tmp1, tmp2;
18185
          /* Extend QImode to SImode using a paradoxical SUBREG.  */
18186
          tmp1 = gen_reg_rtx (SImode);
18187
          emit_move_insn (tmp1, gen_lowpart (SImode, val));
18188
          /* Insert the SImode value as low element of V4SImode vector. */
18189
          tmp2 = gen_reg_rtx (V4SImode);
18190
          tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18191
                                    gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18192
                                    CONST0_RTX (V4SImode),
18193
                                    const1_rtx);
18194
          emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18195
          /* Cast the V4SImode vector back to a V16QImode vector.  */
18196
          tmp1 = gen_reg_rtx (V16QImode);
18197
          emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18198
          /* Duplicate the low byte through the whole low SImode word.  */
18199
          emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18200
          emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18201
          /* Cast the V16QImode vector back to a V4SImode vector.  */
18202
          tmp2 = gen_reg_rtx (V4SImode);
18203
          emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18204
          /* Replicate the low element of the V4SImode vector.  */
18205
          emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18206
          /* Cast the V2SImode back to V16QImode, and store in target.  */
18207
          emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18208
          return true;
18209
        }
18210
      smode = QImode;
18211
      wsmode = HImode;
18212
      wvmode = V8HImode;
18213
      goto widen;
18214
    widen:
18215
      /* Replicate the value once into the next wider mode and recurse.  */
18216
      val = convert_modes (wsmode, smode, val, true);
18217
      x = expand_simple_binop (wsmode, ASHIFT, val,
18218
                               GEN_INT (GET_MODE_BITSIZE (smode)),
18219
                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
18220
      val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18221
 
18222
      x = gen_reg_rtx (wvmode);
18223
      if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18224
        gcc_unreachable ();
18225
      emit_move_insn (target, gen_lowpart (mode, x));
18226
      return true;
18227
 
18228
    default:
18229
      return false;
18230
    }
18231
}
18232
 
18233
/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18234
   whose ONE_VAR element is VAR, and other elements are zero.  Return true
18235
   if successful.  */
18236
 
18237
static bool
18238
ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18239
                                     rtx target, rtx var, int one_var)
18240
{
18241
  enum machine_mode vsimode;
18242
  rtx new_target;
18243
  rtx x, tmp;
18244
 
18245
  switch (mode)
18246
    {
18247
    case V2SFmode:
18248
    case V2SImode:
18249
      if (!mmx_ok)
18250
        return false;
18251
      /* FALLTHRU */
18252
 
18253
    case V2DFmode:
18254
    case V2DImode:
18255
      if (one_var != 0)
18256
        return false;
18257
      var = force_reg (GET_MODE_INNER (mode), var);
18258
      x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18259
      emit_insn (gen_rtx_SET (VOIDmode, target, x));
18260
      return true;
18261
 
18262
    case V4SFmode:
18263
    case V4SImode:
18264
      if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18265
        new_target = gen_reg_rtx (mode);
18266
      else
18267
        new_target = target;
18268
      var = force_reg (GET_MODE_INNER (mode), var);
18269
      x = gen_rtx_VEC_DUPLICATE (mode, var);
18270
      x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18271
      emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18272
      if (one_var != 0)
18273
        {
18274
          /* We need to shuffle the value to the correct position, so
18275
             create a new pseudo to store the intermediate result.  */
18276
 
18277
          /* With SSE2, we can use the integer shuffle insns.  */
18278
          if (mode != V4SFmode && TARGET_SSE2)
18279
            {
18280
              emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18281
                                            GEN_INT (1),
18282
                                            GEN_INT (one_var == 1 ? 0 : 1),
18283
                                            GEN_INT (one_var == 2 ? 0 : 1),
18284
                                            GEN_INT (one_var == 3 ? 0 : 1)));
18285
              if (target != new_target)
18286
                emit_move_insn (target, new_target);
18287
              return true;
18288
            }
18289
 
18290
          /* Otherwise convert the intermediate result to V4SFmode and
18291
             use the SSE1 shuffle instructions.  */
18292
          if (mode != V4SFmode)
18293
            {
18294
              tmp = gen_reg_rtx (V4SFmode);
18295
              emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18296
            }
18297
          else
18298
            tmp = new_target;
18299
 
18300
          emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18301
                                       GEN_INT (1),
18302
                                       GEN_INT (one_var == 1 ? 0 : 1),
18303
                                       GEN_INT (one_var == 2 ? 0+4 : 1+4),
18304
                                       GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18305
 
18306
          if (mode != V4SFmode)
18307
            emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18308
          else if (tmp != target)
18309
            emit_move_insn (target, tmp);
18310
        }
18311
      else if (target != new_target)
18312
        emit_move_insn (target, new_target);
18313
      return true;
18314
 
18315
    case V8HImode:
18316
    case V16QImode:
18317
      vsimode = V4SImode;
18318
      goto widen;
18319
    case V4HImode:
18320
    case V8QImode:
18321
      if (!mmx_ok)
18322
        return false;
18323
      vsimode = V2SImode;
18324
      goto widen;
18325
    widen:
18326
      if (one_var != 0)
18327
        return false;
18328
 
18329
      /* Zero extend the variable element to SImode and recurse.  */
18330
      var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18331
 
18332
      x = gen_reg_rtx (vsimode);
18333
      if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18334
                                                var, one_var))
18335
        gcc_unreachable ();
18336
 
18337
      emit_move_insn (target, gen_lowpart (mode, x));
18338
      return true;
18339
 
18340
    default:
18341
      return false;
18342
    }
18343
}
18344
 
18345
/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18346
   consisting of the values in VALS.  It is known that all elements
18347
   except ONE_VAR are constants.  Return true if successful.  */
18348
 
18349
static bool
18350
ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18351
                                 rtx target, rtx vals, int one_var)
18352
{
18353
  rtx var = XVECEXP (vals, 0, one_var);
18354
  enum machine_mode wmode;
18355
  rtx const_vec, x;
18356
 
18357
  const_vec = copy_rtx (vals);
18358
  XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18359
  const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18360
 
18361
  switch (mode)
18362
    {
18363
    case V2DFmode:
18364
    case V2DImode:
18365
    case V2SFmode:
18366
    case V2SImode:
18367
      /* For the two element vectors, it's just as easy to use
18368
         the general case.  */
18369
      return false;
18370
 
18371
    case V4SFmode:
18372
    case V4SImode:
18373
    case V8HImode:
18374
    case V4HImode:
18375
      break;
18376
 
18377
    case V16QImode:
18378
      wmode = V8HImode;
18379
      goto widen;
18380
    case V8QImode:
18381
      wmode = V4HImode;
18382
      goto widen;
18383
    widen:
18384
      /* There's no way to set one QImode entry easily.  Combine
18385
         the variable value with its adjacent constant value, and
18386
         promote to an HImode set.  */
18387
      x = XVECEXP (vals, 0, one_var ^ 1);
18388
      if (one_var & 1)
18389
        {
18390
          var = convert_modes (HImode, QImode, var, true);
18391
          var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18392
                                     NULL_RTX, 1, OPTAB_LIB_WIDEN);
18393
          x = GEN_INT (INTVAL (x) & 0xff);
18394
        }
18395
      else
18396
        {
18397
          var = convert_modes (HImode, QImode, var, true);
18398
          x = gen_int_mode (INTVAL (x) << 8, HImode);
18399
        }
18400
      if (x != const0_rtx)
18401
        var = expand_simple_binop (HImode, IOR, var, x, var,
18402
                                   1, OPTAB_LIB_WIDEN);
18403
 
18404
      x = gen_reg_rtx (wmode);
18405
      emit_move_insn (x, gen_lowpart (wmode, const_vec));
18406
      ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18407
 
18408
      emit_move_insn (target, gen_lowpart (mode, x));
18409
      return true;
18410
 
18411
    default:
18412
      return false;
18413
    }
18414
 
18415
  emit_move_insn (target, const_vec);
18416
  ix86_expand_vector_set (mmx_ok, target, var, one_var);
18417
  return true;
18418
}
18419
 
18420
/* A subroutine of ix86_expand_vector_init.  Handle the most general case:
18421
   all values variable, and none identical.  */
18422
 
18423
static void
18424
ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18425
                                 rtx target, rtx vals)
18426
{
18427
  enum machine_mode half_mode = GET_MODE_INNER (mode);
18428
  rtx op0 = NULL, op1 = NULL;
18429
  bool use_vec_concat = false;
18430
 
18431
  switch (mode)
18432
    {
18433
    case V2SFmode:
18434
    case V2SImode:
18435
      if (!mmx_ok && !TARGET_SSE)
18436
        break;
18437
      /* FALLTHRU */
18438
 
18439
    case V2DFmode:
18440
    case V2DImode:
18441
      /* For the two element vectors, we always implement VEC_CONCAT.  */
18442
      op0 = XVECEXP (vals, 0, 0);
18443
      op1 = XVECEXP (vals, 0, 1);
18444
      use_vec_concat = true;
18445
      break;
18446
 
18447
    case V4SFmode:
18448
      half_mode = V2SFmode;
18449
      goto half;
18450
    case V4SImode:
18451
      half_mode = V2SImode;
18452
      goto half;
18453
    half:
18454
      {
18455
        rtvec v;
18456
 
18457
        /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18458
           Recurse to load the two halves.  */
18459
 
18460
        op0 = gen_reg_rtx (half_mode);
18461
        v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18462
        ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18463
 
18464
        op1 = gen_reg_rtx (half_mode);
18465
        v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18466
        ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18467
 
18468
        use_vec_concat = true;
18469
      }
18470
      break;
18471
 
18472
    case V8HImode:
18473
    case V16QImode:
18474
    case V4HImode:
18475
    case V8QImode:
18476
      break;
18477
 
18478
    default:
18479
      gcc_unreachable ();
18480
    }
18481
 
18482
  if (use_vec_concat)
18483
    {
18484
      if (!register_operand (op0, half_mode))
18485
        op0 = force_reg (half_mode, op0);
18486
      if (!register_operand (op1, half_mode))
18487
        op1 = force_reg (half_mode, op1);
18488
 
18489
      emit_insn (gen_rtx_SET (VOIDmode, target,
18490
                              gen_rtx_VEC_CONCAT (mode, op0, op1)));
18491
    }
18492
  else
18493
    {
18494
      int i, j, n_elts, n_words, n_elt_per_word;
18495
      enum machine_mode inner_mode;
18496
      rtx words[4], shift;
18497
 
18498
      inner_mode = GET_MODE_INNER (mode);
18499
      n_elts = GET_MODE_NUNITS (mode);
18500
      n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18501
      n_elt_per_word = n_elts / n_words;
18502
      shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18503
 
18504
      for (i = 0; i < n_words; ++i)
18505
        {
18506
          rtx word = NULL_RTX;
18507
 
18508
          for (j = 0; j < n_elt_per_word; ++j)
18509
            {
18510
              rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18511
              elt = convert_modes (word_mode, inner_mode, elt, true);
18512
 
18513
              if (j == 0)
18514
                word = elt;
18515
              else
18516
                {
18517
                  word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18518
                                              word, 1, OPTAB_LIB_WIDEN);
18519
                  word = expand_simple_binop (word_mode, IOR, word, elt,
18520
                                              word, 1, OPTAB_LIB_WIDEN);
18521
                }
18522
            }
18523
 
18524
          words[i] = word;
18525
        }
18526
 
18527
      if (n_words == 1)
18528
        emit_move_insn (target, gen_lowpart (mode, words[0]));
18529
      else if (n_words == 2)
18530
        {
18531
          rtx tmp = gen_reg_rtx (mode);
18532
          emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18533
          emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18534
          emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18535
          emit_move_insn (target, tmp);
18536
        }
18537
      else if (n_words == 4)
18538
        {
18539
          rtx tmp = gen_reg_rtx (V4SImode);
18540
          vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18541
          ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18542
          emit_move_insn (target, gen_lowpart (mode, tmp));
18543
        }
18544
      else
18545
        gcc_unreachable ();
18546
    }
18547
}
18548
 
18549
/* Initialize vector TARGET via VALS.  Suppress the use of MMX
18550
   instructions unless MMX_OK is true.  */
18551
 
18552
void
18553
ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18554
{
18555
  enum machine_mode mode = GET_MODE (target);
18556
  enum machine_mode inner_mode = GET_MODE_INNER (mode);
18557
  int n_elts = GET_MODE_NUNITS (mode);
18558
  int n_var = 0, one_var = -1;
18559
  bool all_same = true, all_const_zero = true;
18560
  int i;
18561
  rtx x;
18562
 
18563
  for (i = 0; i < n_elts; ++i)
18564
    {
18565
      x = XVECEXP (vals, 0, i);
18566
      if (!CONSTANT_P (x))
18567
        n_var++, one_var = i;
18568
      else if (x != CONST0_RTX (inner_mode))
18569
        all_const_zero = false;
18570
      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18571
        all_same = false;
18572
    }
18573
 
18574
  /* Constants are best loaded from the constant pool.  */
18575
  if (n_var == 0)
18576
    {
18577
      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18578
      return;
18579
    }
18580
 
18581
  /* If all values are identical, broadcast the value.  */
18582
  if (all_same
18583
      && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18584
                                            XVECEXP (vals, 0, 0)))
18585
    return;
18586
 
18587
  /* Values where only one field is non-constant are best loaded from
18588
     the pool and overwritten via move later.  */
18589
  if (n_var == 1)
18590
    {
18591
      if (all_const_zero
18592
          && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18593
                                                  XVECEXP (vals, 0, one_var),
18594
                                                  one_var))
18595
        return;
18596
 
18597
      if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18598
        return;
18599
    }
18600
 
18601
  ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18602
}
18603
 
18604
void
18605
ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18606
{
18607
  enum machine_mode mode = GET_MODE (target);
18608
  enum machine_mode inner_mode = GET_MODE_INNER (mode);
18609
  bool use_vec_merge = false;
18610
  rtx tmp;
18611
 
18612
  switch (mode)
18613
    {
18614
    case V2SFmode:
18615
    case V2SImode:
18616
      if (mmx_ok)
18617
        {
18618
          tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18619
          ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18620
          if (elt == 0)
18621
            tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18622
          else
18623
            tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18624
          emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18625
          return;
18626
        }
18627
      break;
18628
 
18629
    case V2DFmode:
18630
    case V2DImode:
18631
      {
18632
        rtx op0, op1;
18633
 
18634
        /* For the two element vectors, we implement a VEC_CONCAT with
18635
           the extraction of the other element.  */
18636
 
18637
        tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18638
        tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18639
 
18640
        if (elt == 0)
18641
          op0 = val, op1 = tmp;
18642
        else
18643
          op0 = tmp, op1 = val;
18644
 
18645
        tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18646
        emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18647
      }
18648
      return;
18649
 
18650
    case V4SFmode:
18651
      switch (elt)
18652
        {
18653
        case 0:
18654
          use_vec_merge = true;
18655
          break;
18656
 
18657
        case 1:
18658
          /* tmp = target = A B C D */
18659
          tmp = copy_to_reg (target);
18660
          /* target = A A B B */
18661
          emit_insn (gen_sse_unpcklps (target, target, target));
18662
          /* target = X A B B */
18663
          ix86_expand_vector_set (false, target, val, 0);
18664
          /* target = A X C D  */
18665
          emit_insn (gen_sse_shufps_1 (target, target, tmp,
18666
                                       GEN_INT (1), GEN_INT (0),
18667
                                       GEN_INT (2+4), GEN_INT (3+4)));
18668
          return;
18669
 
18670
        case 2:
18671
          /* tmp = target = A B C D */
18672
          tmp = copy_to_reg (target);
18673
          /* tmp = X B C D */
18674
          ix86_expand_vector_set (false, tmp, val, 0);
18675
          /* target = A B X D */
18676
          emit_insn (gen_sse_shufps_1 (target, target, tmp,
18677
                                       GEN_INT (0), GEN_INT (1),
18678
                                       GEN_INT (0+4), GEN_INT (3+4)));
18679
          return;
18680
 
18681
        case 3:
18682
          /* tmp = target = A B C D */
18683
          tmp = copy_to_reg (target);
18684
          /* tmp = X B C D */
18685
          ix86_expand_vector_set (false, tmp, val, 0);
18686
          /* target = A B X D */
18687
          emit_insn (gen_sse_shufps_1 (target, target, tmp,
18688
                                       GEN_INT (0), GEN_INT (1),
18689
                                       GEN_INT (2+4), GEN_INT (0+4)));
18690
          return;
18691
 
18692
        default:
18693
          gcc_unreachable ();
18694
        }
18695
      break;
18696
 
18697
    case V4SImode:
18698
      /* Element 0 handled by vec_merge below.  */
18699
      if (elt == 0)
18700
        {
18701
          use_vec_merge = true;
18702
          break;
18703
        }
18704
 
18705
      if (TARGET_SSE2)
18706
        {
18707
          /* With SSE2, use integer shuffles to swap element 0 and ELT,
18708
             store into element 0, then shuffle them back.  */
18709
 
18710
          rtx order[4];
18711
 
18712
          order[0] = GEN_INT (elt);
18713
          order[1] = const1_rtx;
18714
          order[2] = const2_rtx;
18715
          order[3] = GEN_INT (3);
18716
          order[elt] = const0_rtx;
18717
 
18718
          emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18719
                                        order[1], order[2], order[3]));
18720
 
18721
          ix86_expand_vector_set (false, target, val, 0);
18722
 
18723
          emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18724
                                        order[1], order[2], order[3]));
18725
        }
18726
      else
18727
        {
18728
          /* For SSE1, we have to reuse the V4SF code.  */
18729
          ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18730
                                  gen_lowpart (SFmode, val), elt);
18731
        }
18732
      return;
18733
 
18734
    case V8HImode:
18735
      use_vec_merge = TARGET_SSE2;
18736
      break;
18737
    case V4HImode:
18738
      use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18739
      break;
18740
 
18741
    case V16QImode:
18742
    case V8QImode:
18743
    default:
18744
      break;
18745
    }
18746
 
18747
  if (use_vec_merge)
18748
    {
18749
      tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18750
      tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18751
      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18752
    }
18753
  else
18754
    {
18755
      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18756
 
18757
      emit_move_insn (mem, target);
18758
 
18759
      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18760
      emit_move_insn (tmp, val);
18761
 
18762
      emit_move_insn (target, mem);
18763
    }
18764
}
18765
 
18766
void
18767
ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18768
{
18769
  enum machine_mode mode = GET_MODE (vec);
18770
  enum machine_mode inner_mode = GET_MODE_INNER (mode);
18771
  bool use_vec_extr = false;
18772
  rtx tmp;
18773
 
18774
  switch (mode)
18775
    {
18776
    case V2SImode:
18777
    case V2SFmode:
18778
      if (!mmx_ok)
18779
        break;
18780
      /* FALLTHRU */
18781
 
18782
    case V2DFmode:
18783
    case V2DImode:
18784
      use_vec_extr = true;
18785
      break;
18786
 
18787
    case V4SFmode:
18788
      switch (elt)
18789
        {
18790
        case 0:
18791
          tmp = vec;
18792
          break;
18793
 
18794
        case 1:
18795
        case 3:
18796
          tmp = gen_reg_rtx (mode);
18797
          emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18798
                                       GEN_INT (elt), GEN_INT (elt),
18799
                                       GEN_INT (elt+4), GEN_INT (elt+4)));
18800
          break;
18801
 
18802
        case 2:
18803
          tmp = gen_reg_rtx (mode);
18804
          emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18805
          break;
18806
 
18807
        default:
18808
          gcc_unreachable ();
18809
        }
18810
      vec = tmp;
18811
      use_vec_extr = true;
18812
      elt = 0;
18813
      break;
18814
 
18815
    case V4SImode:
18816
      if (TARGET_SSE2)
18817
        {
18818
          switch (elt)
18819
            {
18820
            case 0:
18821
              tmp = vec;
18822
              break;
18823
 
18824
            case 1:
18825
            case 3:
18826
              tmp = gen_reg_rtx (mode);
18827
              emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18828
                                            GEN_INT (elt), GEN_INT (elt),
18829
                                            GEN_INT (elt), GEN_INT (elt)));
18830
              break;
18831
 
18832
            case 2:
18833
              tmp = gen_reg_rtx (mode);
18834
              emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18835
              break;
18836
 
18837
            default:
18838
              gcc_unreachable ();
18839
            }
18840
          vec = tmp;
18841
          use_vec_extr = true;
18842
          elt = 0;
18843
        }
18844
      else
18845
        {
18846
          /* For SSE1, we have to reuse the V4SF code.  */
18847
          ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18848
                                      gen_lowpart (V4SFmode, vec), elt);
18849
          return;
18850
        }
18851
      break;
18852
 
18853
    case V8HImode:
18854
      use_vec_extr = TARGET_SSE2;
18855
      break;
18856
    case V4HImode:
18857
      use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18858
      break;
18859
 
18860
    case V16QImode:
18861
    case V8QImode:
18862
      /* ??? Could extract the appropriate HImode element and shift.  */
18863
    default:
18864
      break;
18865
    }
18866
 
18867
  if (use_vec_extr)
18868
    {
18869
      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18870
      tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18871
 
18872
      /* Let the rtl optimizers know about the zero extension performed.  */
18873
      if (inner_mode == HImode)
18874
        {
18875
          tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18876
          target = gen_lowpart (SImode, target);
18877
        }
18878
 
18879
      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18880
    }
18881
  else
18882
    {
18883
      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18884
 
18885
      emit_move_insn (mem, vec);
18886
 
18887
      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18888
      emit_move_insn (target, tmp);
18889
    }
18890
}
18891
 
18892
/* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
18893
   pattern to reduce; DEST is the destination; IN is the input vector.  */
18894
 
18895
void
18896
ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18897
{
18898
  rtx tmp1, tmp2, tmp3;
18899
 
18900
  tmp1 = gen_reg_rtx (V4SFmode);
18901
  tmp2 = gen_reg_rtx (V4SFmode);
18902
  tmp3 = gen_reg_rtx (V4SFmode);
18903
 
18904
  emit_insn (gen_sse_movhlps (tmp1, in, in));
18905
  emit_insn (fn (tmp2, tmp1, in));
18906
 
18907
  emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18908
                               GEN_INT (1), GEN_INT (1),
18909
                               GEN_INT (1+4), GEN_INT (1+4)));
18910
  emit_insn (fn (dest, tmp2, tmp3));
18911
}
18912
 
18913
/* Target hook for scalar_mode_supported_p.  */
18914
static bool
18915
ix86_scalar_mode_supported_p (enum machine_mode mode)
18916
{
18917
  if (DECIMAL_FLOAT_MODE_P (mode))
18918
    return true;
18919
  else
18920
    return default_scalar_mode_supported_p (mode);
18921
}
18922
 
18923
/* Implements target hook vector_mode_supported_p.  */
18924
static bool
18925
ix86_vector_mode_supported_p (enum machine_mode mode)
18926
{
18927
  if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18928
    return true;
18929
  if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18930
    return true;
18931
  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18932
    return true;
18933
  if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18934
    return true;
18935
  return false;
18936
}
18937
 
18938
/* Worker function for TARGET_MD_ASM_CLOBBERS.
18939
 
18940
   We do this in the new i386 backend to maintain source compatibility
18941
   with the old cc0-based compiler.  */
18942
 
18943
static tree
18944
ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18945
                      tree inputs ATTRIBUTE_UNUSED,
18946
                      tree clobbers)
18947
{
18948
  clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18949
                        clobbers);
18950
  clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18951
                        clobbers);
18952
  clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18953
                        clobbers);
18954
  return clobbers;
18955
}
18956
 
18957
/* Return true if this goes in small data/bss.  */
18958
 
18959
static bool
18960
ix86_in_large_data_p (tree exp)
18961
{
18962
  if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18963
    return false;
18964
 
18965
  /* Functions are never large data.  */
18966
  if (TREE_CODE (exp) == FUNCTION_DECL)
18967
    return false;
18968
 
18969
  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18970
    {
18971
      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18972
      if (strcmp (section, ".ldata") == 0
18973
          || strcmp (section, ".lbss") == 0)
18974
        return true;
18975
      return false;
18976
    }
18977
  else
18978
    {
18979
      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18980
 
18981
      /* If this is an incomplete type with size 0, then we can't put it
18982
         in data because it might be too big when completed.  */
18983
      if (!size || size > ix86_section_threshold)
18984
        return true;
18985
    }
18986
 
18987
  return false;
18988
}
18989
static void
18990
ix86_encode_section_info (tree decl, rtx rtl, int first)
18991
{
18992
  default_encode_section_info (decl, rtl, first);
18993
 
18994
  if (TREE_CODE (decl) == VAR_DECL
18995
      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18996
      && ix86_in_large_data_p (decl))
18997
    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18998
}
18999
 
19000
/* Worker function for REVERSE_CONDITION.  */
19001
 
19002
enum rtx_code
19003
ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19004
{
19005
  return (mode != CCFPmode && mode != CCFPUmode
19006
          ? reverse_condition (code)
19007
          : reverse_condition_maybe_unordered (code));
19008
}
19009
 
19010
/* Output code to perform an x87 FP register move, from OPERANDS[1]
19011
   to OPERANDS[0].  */
19012
 
19013
const char *
19014
output_387_reg_move (rtx insn, rtx *operands)
19015
{
19016
  if (REG_P (operands[1])
19017
      && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19018
    {
19019
      if (REGNO (operands[0]) == FIRST_STACK_REG)
19020
        return output_387_ffreep (operands, 0);
19021
      return "fstp\t%y0";
19022
    }
19023
  if (STACK_TOP_P (operands[0]))
19024
    return "fld%z1\t%y1";
19025
  return "fst\t%y0";
19026
}
19027
 
19028
/* Output code to perform a conditional jump to LABEL, if C2 flag in
19029
   FP status register is set.  */
19030
 
19031
void
19032
ix86_emit_fp_unordered_jump (rtx label)
19033
{
19034
  rtx reg = gen_reg_rtx (HImode);
19035
  rtx temp;
19036
 
19037
  emit_insn (gen_x86_fnstsw_1 (reg));
19038
 
19039
  if (TARGET_USE_SAHF)
19040
    {
19041
      emit_insn (gen_x86_sahf_1 (reg));
19042
 
19043
      temp = gen_rtx_REG (CCmode, FLAGS_REG);
19044
      temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19045
    }
19046
  else
19047
    {
19048
      emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19049
 
19050
      temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19051
      temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19052
    }
19053
 
19054
  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19055
                              gen_rtx_LABEL_REF (VOIDmode, label),
19056
                              pc_rtx);
19057
  temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19058
  emit_jump_insn (temp);
19059
}
19060
 
19061
/* Output code to perform a log1p XFmode calculation.  */
19062
 
19063
void ix86_emit_i387_log1p (rtx op0, rtx op1)
19064
{
19065
  rtx label1 = gen_label_rtx ();
19066
  rtx label2 = gen_label_rtx ();
19067
 
19068
  rtx tmp = gen_reg_rtx (XFmode);
19069
  rtx tmp2 = gen_reg_rtx (XFmode);
19070
 
19071
  emit_insn (gen_absxf2 (tmp, op1));
19072
  emit_insn (gen_cmpxf (tmp,
19073
    CONST_DOUBLE_FROM_REAL_VALUE (
19074
       REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19075
       XFmode)));
19076
  emit_jump_insn (gen_bge (label1));
19077
 
19078
  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19079
  emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19080
  emit_jump (label2);
19081
 
19082
  emit_label (label1);
19083
  emit_move_insn (tmp, CONST1_RTX (XFmode));
19084
  emit_insn (gen_addxf3 (tmp, op1, tmp));
19085
  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19086
  emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19087
 
19088
  emit_label (label2);
19089
}
19090
 
19091
/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
19092
 
19093
static void
19094
i386_solaris_elf_named_section (const char *name, unsigned int flags,
19095
                                tree decl)
19096
{
19097
  /* With Binutils 2.15, the "@unwind" marker must be specified on
19098
     every occurrence of the ".eh_frame" section, not just the first
19099
     one.  */
19100
  if (TARGET_64BIT
19101
      && strcmp (name, ".eh_frame") == 0)
19102
    {
19103
      fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19104
               flags & SECTION_WRITE ? "aw" : "a");
19105
      return;
19106
    }
19107
  default_elf_asm_named_section (name, flags, decl);
19108
}
19109
 
19110
/* Return the mangling of TYPE if it is an extended fundamental type.  */
19111
 
19112
static const char *
19113
ix86_mangle_fundamental_type (tree type)
19114
{
19115
  switch (TYPE_MODE (type))
19116
    {
19117
    case TFmode:
19118
      /* __float128 is "g".  */
19119
      return "g";
19120
    case XFmode:
19121
      /* "long double" or __float80 is "e".  */
19122
      return "e";
19123
    default:
19124
      return NULL;
19125
    }
19126
}
19127
 
19128
/* For 32-bit code we can save PIC register setup by using
19129
   __stack_chk_fail_local hidden function instead of calling
19130
   __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
19131
   register, so it is better to call __stack_chk_fail directly.  */
19132
 
19133
static tree
19134
ix86_stack_protect_fail (void)
19135
{
19136
  return TARGET_64BIT
19137
         ? default_external_stack_protect_fail ()
19138
         : default_hidden_stack_protect_fail ();
19139
}
19140
 
19141
/* Select a format to encode pointers in exception handling data.  CODE
19142
   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
19143
   true if the symbol may be affected by dynamic relocations.
19144
 
19145
   ??? All x86 object file formats are capable of representing this.
19146
   After all, the relocation needed is the same as for the call insn.
19147
   Whether or not a particular assembler allows us to enter such, I
19148
   guess we'll have to see.  */
19149
int
19150
asm_preferred_eh_data_format (int code, int global)
19151
{
19152
  if (flag_pic)
19153
    {
19154
      int type = DW_EH_PE_sdata8;
19155
      if (!TARGET_64BIT
19156
          || ix86_cmodel == CM_SMALL_PIC
19157
          || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19158
        type = DW_EH_PE_sdata4;
19159
      return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19160
    }
19161
  if (ix86_cmodel == CM_SMALL
19162
      || (ix86_cmodel == CM_MEDIUM && code))
19163
    return DW_EH_PE_udata4;
19164
  return DW_EH_PE_absptr;
19165
}
19166
 
19167
#include "gt-i386.h"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.