OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [gcc/] [config/] [pa/] [pa.c] - Blame information for rev 297

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 282 jeremybenn
/* Subroutines for insn-output.c for HPPA.
2
   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3
   2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4
   Free Software Foundation, Inc.
5
   Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6
 
7
This file is part of GCC.
8
 
9
GCC is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 3, or (at your option)
12
any later version.
13
 
14
GCC is distributed in the hope that it will be useful,
15
but WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
GNU General Public License for more details.
18
 
19
You should have received a copy of the GNU General Public License
20
along with GCC; see the file COPYING3.  If not see
21
<http://www.gnu.org/licenses/>.  */
22
 
23
#include "config.h"
24
#include "system.h"
25
#include "coretypes.h"
26
#include "tm.h"
27
#include "rtl.h"
28
#include "regs.h"
29
#include "hard-reg-set.h"
30
#include "real.h"
31
#include "insn-config.h"
32
#include "conditions.h"
33
#include "insn-attr.h"
34
#include "flags.h"
35
#include "tree.h"
36
#include "output.h"
37
#include "except.h"
38
#include "expr.h"
39
#include "optabs.h"
40
#include "reload.h"
41
#include "integrate.h"
42
#include "function.h"
43
#include "toplev.h"
44
#include "ggc.h"
45
#include "recog.h"
46
#include "predict.h"
47
#include "tm_p.h"
48
#include "target.h"
49
#include "target-def.h"
50
#include "df.h"
51
 
52
/* Return nonzero if there is a bypass for the output of
53
   OUT_INSN and the fp store IN_INSN.  */
54
int
55
hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
56
{
57
  enum machine_mode store_mode;
58
  enum machine_mode other_mode;
59
  rtx set;
60
 
61
  if (recog_memoized (in_insn) < 0
62
      || (get_attr_type (in_insn) != TYPE_FPSTORE
63
          && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
64
      || recog_memoized (out_insn) < 0)
65
    return 0;
66
 
67
  store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
68
 
69
  set = single_set (out_insn);
70
  if (!set)
71
    return 0;
72
 
73
  other_mode = GET_MODE (SET_SRC (set));
74
 
75
  return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
76
}
77
 
78
 
79
#ifndef DO_FRAME_NOTES
80
#ifdef INCOMING_RETURN_ADDR_RTX
81
#define DO_FRAME_NOTES 1
82
#else
83
#define DO_FRAME_NOTES 0
84
#endif
85
#endif
86
 
87
static void copy_reg_pointer (rtx, rtx);
88
static void fix_range (const char *);
89
static bool pa_handle_option (size_t, const char *, int);
90
static int hppa_address_cost (rtx, bool);
91
static bool hppa_rtx_costs (rtx, int, int, int *, bool);
92
static inline rtx force_mode (enum machine_mode, rtx);
93
static void pa_reorg (void);
94
static void pa_combine_instructions (void);
95
static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
96
static bool forward_branch_p (rtx);
97
static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
98
static int compute_movmem_length (rtx);
99
static int compute_clrmem_length (rtx);
100
static bool pa_assemble_integer (rtx, unsigned int, int);
101
static void remove_useless_addtr_insns (int);
102
static void store_reg (int, HOST_WIDE_INT, int);
103
static void store_reg_modify (int, int, HOST_WIDE_INT);
104
static void load_reg (int, HOST_WIDE_INT, int);
105
static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
106
static rtx pa_function_value (const_tree, const_tree, bool);
107
static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
108
static void update_total_code_bytes (unsigned int);
109
static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
110
static int pa_adjust_cost (rtx, rtx, rtx, int);
111
static int pa_adjust_priority (rtx, int);
112
static int pa_issue_rate (void);
113
static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
114
static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
115
     ATTRIBUTE_UNUSED;
116
static void pa_encode_section_info (tree, rtx, int);
117
static const char *pa_strip_name_encoding (const char *);
118
static bool pa_function_ok_for_sibcall (tree, tree);
119
static void pa_globalize_label (FILE *, const char *)
120
     ATTRIBUTE_UNUSED;
121
static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
122
                                    HOST_WIDE_INT, tree);
123
#if !defined(USE_COLLECT2)
124
static void pa_asm_out_constructor (rtx, int);
125
static void pa_asm_out_destructor (rtx, int);
126
#endif
127
static void pa_init_builtins (void);
128
static rtx hppa_builtin_saveregs (void);
129
static void hppa_va_start (tree, rtx);
130
static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
131
static bool pa_scalar_mode_supported_p (enum machine_mode);
132
static bool pa_commutative_p (const_rtx x, int outer_code);
133
static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
134
static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
135
static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
136
static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
137
static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
138
static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
139
static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
140
static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
141
static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
142
static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
143
static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
144
static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
145
static void output_deferred_plabels (void);
146
static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
147
#ifdef ASM_OUTPUT_EXTERNAL_REAL
148
static void pa_hpux_file_end (void);
149
#endif
150
#ifdef HPUX_LONG_DOUBLE_LIBRARY
151
static void pa_hpux_init_libfuncs (void);
152
#endif
153
static rtx pa_struct_value_rtx (tree, int);
154
static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
155
                                  const_tree, bool);
156
static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
157
                                 tree, bool);
158
static struct machine_function * pa_init_machine_status (void);
159
static enum reg_class pa_secondary_reload (bool, rtx, enum reg_class,
160
                                           enum machine_mode,
161
                                           secondary_reload_info *);
162
static void pa_extra_live_on_entry (bitmap);
163
static enum machine_mode pa_promote_function_mode (const_tree,
164
                                                   enum machine_mode, int *,
165
                                                   const_tree, int);
166
 
167
static void pa_asm_trampoline_template (FILE *);
168
static void pa_trampoline_init (rtx, tree, rtx);
169
static rtx pa_trampoline_adjust_address (rtx);
170
static rtx pa_delegitimize_address (rtx);
171
 
172
/* The following extra sections are only used for SOM.  */
173
static GTY(()) section *som_readonly_data_section;
174
static GTY(()) section *som_one_only_readonly_data_section;
175
static GTY(()) section *som_one_only_data_section;
176
 
177
/* Which cpu we are scheduling for.  */
178
enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
179
 
180
/* The UNIX standard to use for predefines and linking.  */
181
int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
182
 
183
/* Counts for the number of callee-saved general and floating point
184
   registers which were saved by the current function's prologue.  */
185
static int gr_saved, fr_saved;
186
 
187
/* Boolean indicating whether the return pointer was saved by the
188
   current function's prologue.  */
189
static bool rp_saved;
190
 
191
static rtx find_addr_reg (rtx);
192
 
193
/* Keep track of the number of bytes we have output in the CODE subspace
194
   during this compilation so we'll know when to emit inline long-calls.  */
195
unsigned long total_code_bytes;
196
 
197
/* The last address of the previous function plus the number of bytes in
198
   associated thunks that have been output.  This is used to determine if
199
   a thunk can use an IA-relative branch to reach its target function.  */
200
static unsigned int last_address;
201
 
202
/* Variables to handle plabels that we discover are necessary at assembly
203
   output time.  They are output after the current function.  */
204
struct GTY(()) deferred_plabel
205
{
206
  rtx internal_label;
207
  rtx symbol;
208
};
209
static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
210
  deferred_plabels;
211
static size_t n_deferred_plabels = 0;
212
 
213
 
214
/* Initialize the GCC target structure.  */
215
 
216
#undef TARGET_ASM_ALIGNED_HI_OP
217
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
218
#undef TARGET_ASM_ALIGNED_SI_OP
219
#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
220
#undef TARGET_ASM_ALIGNED_DI_OP
221
#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
222
#undef TARGET_ASM_UNALIGNED_HI_OP
223
#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
224
#undef TARGET_ASM_UNALIGNED_SI_OP
225
#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
226
#undef TARGET_ASM_UNALIGNED_DI_OP
227
#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
228
#undef TARGET_ASM_INTEGER
229
#define TARGET_ASM_INTEGER pa_assemble_integer
230
 
231
#undef TARGET_ASM_FUNCTION_PROLOGUE
232
#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
233
#undef TARGET_ASM_FUNCTION_EPILOGUE
234
#define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
235
 
236
#undef TARGET_FUNCTION_VALUE
237
#define TARGET_FUNCTION_VALUE pa_function_value
238
 
239
#undef TARGET_LEGITIMIZE_ADDRESS
240
#define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
241
 
242
#undef TARGET_SCHED_ADJUST_COST
243
#define TARGET_SCHED_ADJUST_COST pa_adjust_cost
244
#undef TARGET_SCHED_ADJUST_PRIORITY
245
#define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
246
#undef TARGET_SCHED_ISSUE_RATE
247
#define TARGET_SCHED_ISSUE_RATE pa_issue_rate
248
 
249
#undef TARGET_ENCODE_SECTION_INFO
250
#define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
251
#undef TARGET_STRIP_NAME_ENCODING
252
#define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
253
 
254
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
255
#define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
256
 
257
#undef TARGET_COMMUTATIVE_P
258
#define TARGET_COMMUTATIVE_P pa_commutative_p
259
 
260
#undef TARGET_ASM_OUTPUT_MI_THUNK
261
#define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
262
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
263
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
264
 
265
#undef TARGET_ASM_FILE_END
266
#ifdef ASM_OUTPUT_EXTERNAL_REAL
267
#define TARGET_ASM_FILE_END pa_hpux_file_end
268
#else
269
#define TARGET_ASM_FILE_END output_deferred_plabels
270
#endif
271
 
272
#if !defined(USE_COLLECT2)
273
#undef TARGET_ASM_CONSTRUCTOR
274
#define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
275
#undef TARGET_ASM_DESTRUCTOR
276
#define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
277
#endif
278
 
279
#undef TARGET_DEFAULT_TARGET_FLAGS
280
#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
281
#undef TARGET_HANDLE_OPTION
282
#define TARGET_HANDLE_OPTION pa_handle_option
283
 
284
#undef TARGET_INIT_BUILTINS
285
#define TARGET_INIT_BUILTINS pa_init_builtins
286
 
287
#undef TARGET_RTX_COSTS
288
#define TARGET_RTX_COSTS hppa_rtx_costs
289
#undef TARGET_ADDRESS_COST
290
#define TARGET_ADDRESS_COST hppa_address_cost
291
 
292
#undef TARGET_MACHINE_DEPENDENT_REORG
293
#define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
294
 
295
#ifdef HPUX_LONG_DOUBLE_LIBRARY
296
#undef TARGET_INIT_LIBFUNCS
297
#define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
298
#endif
299
 
300
#undef TARGET_PROMOTE_FUNCTION_MODE
301
#define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
302
#undef TARGET_PROMOTE_PROTOTYPES
303
#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
304
 
305
#undef TARGET_STRUCT_VALUE_RTX
306
#define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
307
#undef TARGET_RETURN_IN_MEMORY
308
#define TARGET_RETURN_IN_MEMORY pa_return_in_memory
309
#undef TARGET_MUST_PASS_IN_STACK
310
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
311
#undef TARGET_PASS_BY_REFERENCE
312
#define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
313
#undef TARGET_CALLEE_COPIES
314
#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
315
#undef TARGET_ARG_PARTIAL_BYTES
316
#define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
317
 
318
#undef TARGET_EXPAND_BUILTIN_SAVEREGS
319
#define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
320
#undef TARGET_EXPAND_BUILTIN_VA_START
321
#define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
322
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
323
#define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
324
 
325
#undef TARGET_SCALAR_MODE_SUPPORTED_P
326
#define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
327
 
328
#undef TARGET_CANNOT_FORCE_CONST_MEM
329
#define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
330
 
331
#undef TARGET_SECONDARY_RELOAD
332
#define TARGET_SECONDARY_RELOAD pa_secondary_reload
333
 
334
#undef TARGET_EXTRA_LIVE_ON_ENTRY
335
#define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
336
 
337
#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
338
#define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
339
#undef TARGET_TRAMPOLINE_INIT
340
#define TARGET_TRAMPOLINE_INIT pa_trampoline_init
341
#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
342
#define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
343
#undef TARGET_DELEGITIMIZE_ADDRESS
344
#define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
345
 
346
struct gcc_target targetm = TARGET_INITIALIZER;
347
 
348
/* Parse the -mfixed-range= option string.  */
349
 
350
static void
351
fix_range (const char *const_str)
352
{
353
  int i, first, last;
354
  char *str, *dash, *comma;
355
 
356
  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
357
     REG2 are either register names or register numbers.  The effect
358
     of this option is to mark the registers in the range from REG1 to
359
     REG2 as ``fixed'' so they won't be used by the compiler.  This is
360
     used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
361
 
362
  i = strlen (const_str);
363
  str = (char *) alloca (i + 1);
364
  memcpy (str, const_str, i + 1);
365
 
366
  while (1)
367
    {
368
      dash = strchr (str, '-');
369
      if (!dash)
370
        {
371
          warning (0, "value of -mfixed-range must have form REG1-REG2");
372
          return;
373
        }
374
      *dash = '\0';
375
 
376
      comma = strchr (dash + 1, ',');
377
      if (comma)
378
        *comma = '\0';
379
 
380
      first = decode_reg_name (str);
381
      if (first < 0)
382
        {
383
          warning (0, "unknown register name: %s", str);
384
          return;
385
        }
386
 
387
      last = decode_reg_name (dash + 1);
388
      if (last < 0)
389
        {
390
          warning (0, "unknown register name: %s", dash + 1);
391
          return;
392
        }
393
 
394
      *dash = '-';
395
 
396
      if (first > last)
397
        {
398
          warning (0, "%s-%s is an empty range", str, dash + 1);
399
          return;
400
        }
401
 
402
      for (i = first; i <= last; ++i)
403
        fixed_regs[i] = call_used_regs[i] = 1;
404
 
405
      if (!comma)
406
        break;
407
 
408
      *comma = ',';
409
      str = comma + 1;
410
    }
411
 
412
  /* Check if all floating point registers have been fixed.  */
413
  for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
414
    if (!fixed_regs[i])
415
      break;
416
 
417
  if (i > FP_REG_LAST)
418
    target_flags |= MASK_DISABLE_FPREGS;
419
}
420
 
421
/* Implement TARGET_HANDLE_OPTION.  */
422
 
423
static bool
424
pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
425
{
426
  switch (code)
427
    {
428
    case OPT_mnosnake:
429
    case OPT_mpa_risc_1_0:
430
    case OPT_march_1_0:
431
      target_flags &= ~(MASK_PA_11 | MASK_PA_20);
432
      return true;
433
 
434
    case OPT_msnake:
435
    case OPT_mpa_risc_1_1:
436
    case OPT_march_1_1:
437
      target_flags &= ~MASK_PA_20;
438
      target_flags |= MASK_PA_11;
439
      return true;
440
 
441
    case OPT_mpa_risc_2_0:
442
    case OPT_march_2_0:
443
      target_flags |= MASK_PA_11 | MASK_PA_20;
444
      return true;
445
 
446
    case OPT_mschedule_:
447
      if (strcmp (arg, "8000") == 0)
448
        pa_cpu = PROCESSOR_8000;
449
      else if (strcmp (arg, "7100") == 0)
450
        pa_cpu = PROCESSOR_7100;
451
      else if (strcmp (arg, "700") == 0)
452
        pa_cpu = PROCESSOR_700;
453
      else if (strcmp (arg, "7100LC") == 0)
454
        pa_cpu = PROCESSOR_7100LC;
455
      else if (strcmp (arg, "7200") == 0)
456
        pa_cpu = PROCESSOR_7200;
457
      else if (strcmp (arg, "7300") == 0)
458
        pa_cpu = PROCESSOR_7300;
459
      else
460
        return false;
461
      return true;
462
 
463
    case OPT_mfixed_range_:
464
      fix_range (arg);
465
      return true;
466
 
467
#if TARGET_HPUX
468
    case OPT_munix_93:
469
      flag_pa_unix = 1993;
470
      return true;
471
#endif
472
 
473
#if TARGET_HPUX_10_10
474
    case OPT_munix_95:
475
      flag_pa_unix = 1995;
476
      return true;
477
#endif
478
 
479
#if TARGET_HPUX_11_11
480
    case OPT_munix_98:
481
      flag_pa_unix = 1998;
482
      return true;
483
#endif
484
 
485
    default:
486
      return true;
487
    }
488
}
489
 
490
void
491
override_options (void)
492
{
493
  /* Unconditional branches in the delay slot are not compatible with dwarf2
494
     call frame information.  There is no benefit in using this optimization
495
     on PA8000 and later processors.  */
496
  if (pa_cpu >= PROCESSOR_8000
497
      || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
498
      || flag_unwind_tables)
499
    target_flags &= ~MASK_JUMP_IN_DELAY;
500
 
501
  if (flag_pic && TARGET_PORTABLE_RUNTIME)
502
    {
503
      warning (0, "PIC code generation is not supported in the portable runtime model");
504
    }
505
 
506
  if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
507
   {
508
      warning (0, "PIC code generation is not compatible with fast indirect calls");
509
   }
510
 
511
  if (! TARGET_GAS && write_symbols != NO_DEBUG)
512
    {
513
      warning (0, "-g is only supported when using GAS on this processor,");
514
      warning (0, "-g option disabled");
515
      write_symbols = NO_DEBUG;
516
    }
517
 
518
  /* We only support the "big PIC" model now.  And we always generate PIC
519
     code when in 64bit mode.  */
520
  if (flag_pic == 1 || TARGET_64BIT)
521
    flag_pic = 2;
522
 
523
  /* Disable -freorder-blocks-and-partition as we don't support hot and
524
     cold partitioning.  */
525
  if (flag_reorder_blocks_and_partition)
526
    {
527
      inform (input_location,
528
              "-freorder-blocks-and-partition does not work "
529
              "on this architecture");
530
      flag_reorder_blocks_and_partition = 0;
531
      flag_reorder_blocks = 1;
532
    }
533
 
534
  /* We can't guarantee that .dword is available for 32-bit targets.  */
535
  if (UNITS_PER_WORD == 4)
536
    targetm.asm_out.aligned_op.di = NULL;
537
 
538
  /* The unaligned ops are only available when using GAS.  */
539
  if (!TARGET_GAS)
540
    {
541
      targetm.asm_out.unaligned_op.hi = NULL;
542
      targetm.asm_out.unaligned_op.si = NULL;
543
      targetm.asm_out.unaligned_op.di = NULL;
544
    }
545
 
546
  init_machine_status = pa_init_machine_status;
547
}
548
 
549
static void
550
pa_init_builtins (void)
551
{
552
#ifdef DONT_HAVE_FPUTC_UNLOCKED
553
  built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
554
    built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
555
  implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
556
    = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
557
#endif
558
#if TARGET_HPUX_11
559
  if (built_in_decls [BUILT_IN_FINITE])
560
    set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
561
  if (built_in_decls [BUILT_IN_FINITEF])
562
    set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
563
#endif
564
}
565
 
566
/* Function to init struct machine_function.
567
   This will be called, via a pointer variable,
568
   from push_function_context.  */
569
 
570
static struct machine_function *
571
pa_init_machine_status (void)
572
{
573
  return GGC_CNEW (machine_function);
574
}
575
 
576
/* If FROM is a probable pointer register, mark TO as a probable
577
   pointer register with the same pointer alignment as FROM.  */
578
 
579
static void
580
copy_reg_pointer (rtx to, rtx from)
581
{
582
  if (REG_POINTER (from))
583
    mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
584
}
585
 
586
/* Return 1 if X contains a symbolic expression.  We know these
587
   expressions will have one of a few well defined forms, so
588
   we need only check those forms.  */
589
int
590
symbolic_expression_p (rtx x)
591
{
592
 
593
  /* Strip off any HIGH.  */
594
  if (GET_CODE (x) == HIGH)
595
    x = XEXP (x, 0);
596
 
597
  return (symbolic_operand (x, VOIDmode));
598
}
599
 
600
/* Accept any constant that can be moved in one instruction into a
601
   general register.  */
602
int
603
cint_ok_for_move (HOST_WIDE_INT ival)
604
{
605
  /* OK if ldo, ldil, or zdepi, can be used.  */
606
  return (VAL_14_BITS_P (ival)
607
          || ldil_cint_p (ival)
608
          || zdepi_cint_p (ival));
609
}
610
 
611
/* Return truth value of whether OP can be used as an operand in a
612
   adddi3 insn.  */
613
int
614
adddi3_operand (rtx op, enum machine_mode mode)
615
{
616
  return (register_operand (op, mode)
617
          || (GET_CODE (op) == CONST_INT
618
              && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
619
}
620
 
621
/* True iff the operand OP can be used as the destination operand of
622
   an integer store.  This also implies the operand could be used as
623
   the source operand of an integer load.  Symbolic, lo_sum and indexed
624
   memory operands are not allowed.  We accept reloading pseudos and
625
   other memory operands.  */
626
int
627
integer_store_memory_operand (rtx op, enum machine_mode mode)
628
{
629
  return ((reload_in_progress
630
           && REG_P (op)
631
           && REGNO (op) >= FIRST_PSEUDO_REGISTER
632
           && reg_renumber [REGNO (op)] < 0)
633
          || (GET_CODE (op) == MEM
634
              && (reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
635
              && !symbolic_memory_operand (op, VOIDmode)
636
              && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
637
              && !IS_INDEX_ADDR_P (XEXP (op, 0))));
638
}
639
 
640
/* True iff ldil can be used to load this CONST_INT.  The least
641
   significant 11 bits of the value must be zero and the value must
642
   not change sign when extended from 32 to 64 bits.  */
643
int
644
ldil_cint_p (HOST_WIDE_INT ival)
645
{
646
  HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
647
 
648
  return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
649
}
650
 
651
/* True iff zdepi can be used to generate this CONST_INT.
652
   zdepi first sign extends a 5-bit signed number to a given field
653
   length, then places this field anywhere in a zero.  */
654
int
655
zdepi_cint_p (unsigned HOST_WIDE_INT x)
656
{
657
  unsigned HOST_WIDE_INT lsb_mask, t;
658
 
659
  /* This might not be obvious, but it's at least fast.
660
     This function is critical; we don't have the time loops would take.  */
661
  lsb_mask = x & -x;
662
  t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
663
  /* Return true iff t is a power of two.  */
664
  return ((t & (t - 1)) == 0);
665
}
666
 
667
/* True iff depi or extru can be used to compute (reg & mask).
668
   Accept bit pattern like these:
669
   0....01....1
670
   1....10....0
671
   1..10..01..1  */
672
int
673
and_mask_p (unsigned HOST_WIDE_INT mask)
674
{
675
  mask = ~mask;
676
  mask += mask & -mask;
677
  return (mask & (mask - 1)) == 0;
678
}
679
 
680
/* True iff depi can be used to compute (reg | MASK).  */
681
int
682
ior_mask_p (unsigned HOST_WIDE_INT mask)
683
{
684
  mask += mask & -mask;
685
  return (mask & (mask - 1)) == 0;
686
}
687
 
688
/* Legitimize PIC addresses.  If the address is already
689
   position-independent, we return ORIG.  Newly generated
690
   position-independent addresses go to REG.  If we need more
691
   than one register, we lose.  */
692
 
693
rtx
694
legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
695
{
696
  rtx pic_ref = orig;
697
 
698
  gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
699
 
700
  /* Labels need special handling.  */
701
  if (pic_label_operand (orig, mode))
702
    {
703
      rtx insn;
704
 
705
      /* We do not want to go through the movXX expanders here since that
706
         would create recursion.
707
 
708
         Nor do we really want to call a generator for a named pattern
709
         since that requires multiple patterns if we want to support
710
         multiple word sizes.
711
 
712
         So instead we just emit the raw set, which avoids the movXX
713
         expanders completely.  */
714
      mark_reg_pointer (reg, BITS_PER_UNIT);
715
      insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
716
 
717
      /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
718
      add_reg_note (insn, REG_EQUAL, orig);
719
 
720
      /* During and after reload, we need to generate a REG_LABEL_OPERAND note
721
         and update LABEL_NUSES because this is not done automatically.  */
722
      if (reload_in_progress || reload_completed)
723
        {
724
          /* Extract LABEL_REF.  */
725
          if (GET_CODE (orig) == CONST)
726
            orig = XEXP (XEXP (orig, 0), 0);
727
          /* Extract CODE_LABEL.  */
728
          orig = XEXP (orig, 0);
729
          add_reg_note (insn, REG_LABEL_OPERAND, orig);
730
          LABEL_NUSES (orig)++;
731
        }
732
      crtl->uses_pic_offset_table = 1;
733
      return reg;
734
    }
735
  if (GET_CODE (orig) == SYMBOL_REF)
736
    {
737
      rtx insn, tmp_reg;
738
 
739
      gcc_assert (reg);
740
 
741
      /* Before reload, allocate a temporary register for the intermediate
742
         result.  This allows the sequence to be deleted when the final
743
         result is unused and the insns are trivially dead.  */
744
      tmp_reg = ((reload_in_progress || reload_completed)
745
                 ? reg : gen_reg_rtx (Pmode));
746
 
747
      if (function_label_operand (orig, mode))
748
        {
749
          /* Force function label into memory in word mode.  */
750
          orig = XEXP (force_const_mem (word_mode, orig), 0);
751
          /* Load plabel address from DLT.  */
752
          emit_move_insn (tmp_reg,
753
                          gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
754
                                        gen_rtx_HIGH (word_mode, orig)));
755
          pic_ref
756
            = gen_const_mem (Pmode,
757
                             gen_rtx_LO_SUM (Pmode, tmp_reg,
758
                                             gen_rtx_UNSPEC (Pmode,
759
                                                         gen_rtvec (1, orig),
760
                                                         UNSPEC_DLTIND14R)));
761
          emit_move_insn (reg, pic_ref);
762
          /* Now load address of function descriptor.  */
763
          pic_ref = gen_rtx_MEM (Pmode, reg);
764
        }
765
      else
766
        {
767
          /* Load symbol reference from DLT.  */
768
          emit_move_insn (tmp_reg,
769
                          gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
770
                                        gen_rtx_HIGH (word_mode, orig)));
771
          pic_ref
772
            = gen_const_mem (Pmode,
773
                             gen_rtx_LO_SUM (Pmode, tmp_reg,
774
                                             gen_rtx_UNSPEC (Pmode,
775
                                                         gen_rtvec (1, orig),
776
                                                         UNSPEC_DLTIND14R)));
777
        }
778
 
779
      crtl->uses_pic_offset_table = 1;
780
      mark_reg_pointer (reg, BITS_PER_UNIT);
781
      insn = emit_move_insn (reg, pic_ref);
782
 
783
      /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
784
      set_unique_reg_note (insn, REG_EQUAL, orig);
785
 
786
      return reg;
787
    }
788
  else if (GET_CODE (orig) == CONST)
789
    {
790
      rtx base;
791
 
792
      if (GET_CODE (XEXP (orig, 0)) == PLUS
793
          && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
794
        return orig;
795
 
796
      gcc_assert (reg);
797
      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
798
 
799
      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
800
      orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
801
                                     base == reg ? 0 : reg);
802
 
803
      if (GET_CODE (orig) == CONST_INT)
804
        {
805
          if (INT_14_BITS (orig))
806
            return plus_constant (base, INTVAL (orig));
807
          orig = force_reg (Pmode, orig);
808
        }
809
      pic_ref = gen_rtx_PLUS (Pmode, base, orig);
810
      /* Likewise, should we set special REG_NOTEs here?  */
811
    }
812
 
813
  return pic_ref;
814
}
815
 
816
static GTY(()) rtx gen_tls_tga;
817
 
818
static rtx
819
gen_tls_get_addr (void)
820
{
821
  if (!gen_tls_tga)
822
    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
823
  return gen_tls_tga;
824
}
825
 
826
static rtx
827
hppa_tls_call (rtx arg)
828
{
829
  rtx ret;
830
 
831
  ret = gen_reg_rtx (Pmode);
832
  emit_library_call_value (gen_tls_get_addr (), ret,
833
                           LCT_CONST, Pmode, 1, arg, Pmode);
834
 
835
  return ret;
836
}
837
 
838
static rtx
839
legitimize_tls_address (rtx addr)
840
{
841
  rtx ret, insn, tmp, t1, t2, tp;
842
  enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
843
 
844
  switch (model)
845
    {
846
      case TLS_MODEL_GLOBAL_DYNAMIC:
847
        tmp = gen_reg_rtx (Pmode);
848
        if (flag_pic)
849
          emit_insn (gen_tgd_load_pic (tmp, addr));
850
        else
851
          emit_insn (gen_tgd_load (tmp, addr));
852
        ret = hppa_tls_call (tmp);
853
        break;
854
 
855
      case TLS_MODEL_LOCAL_DYNAMIC:
856
        ret = gen_reg_rtx (Pmode);
857
        tmp = gen_reg_rtx (Pmode);
858
        start_sequence ();
859
        if (flag_pic)
860
          emit_insn (gen_tld_load_pic (tmp, addr));
861
        else
862
          emit_insn (gen_tld_load (tmp, addr));
863
        t1 = hppa_tls_call (tmp);
864
        insn = get_insns ();
865
        end_sequence ();
866
        t2 = gen_reg_rtx (Pmode);
867
        emit_libcall_block (insn, t2, t1,
868
                            gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
869
                                            UNSPEC_TLSLDBASE));
870
        emit_insn (gen_tld_offset_load (ret, addr, t2));
871
        break;
872
 
873
      case TLS_MODEL_INITIAL_EXEC:
874
        tp = gen_reg_rtx (Pmode);
875
        tmp = gen_reg_rtx (Pmode);
876
        ret = gen_reg_rtx (Pmode);
877
        emit_insn (gen_tp_load (tp));
878
        if (flag_pic)
879
          emit_insn (gen_tie_load_pic (tmp, addr));
880
        else
881
          emit_insn (gen_tie_load (tmp, addr));
882
        emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
883
        break;
884
 
885
      case TLS_MODEL_LOCAL_EXEC:
886
        tp = gen_reg_rtx (Pmode);
887
        ret = gen_reg_rtx (Pmode);
888
        emit_insn (gen_tp_load (tp));
889
        emit_insn (gen_tle_load (ret, addr, tp));
890
        break;
891
 
892
      default:
893
        gcc_unreachable ();
894
    }
895
 
896
  return ret;
897
}
898
 
899
/* Try machine-dependent ways of modifying an illegitimate address
900
   to be legitimate.  If we find one, return the new, valid address.
901
   This macro is used in only one place: `memory_address' in explow.c.
902
 
903
   OLDX is the address as it was before break_out_memory_refs was called.
904
   In some cases it is useful to look at this to decide what needs to be done.
905
 
906
   It is always safe for this macro to do nothing.  It exists to recognize
907
   opportunities to optimize the output.
908
 
909
   For the PA, transform:
910
 
911
        memory(X + <large int>)
912
 
913
   into:
914
 
915
        if (<large int> & mask) >= 16
916
          Y = (<large int> & ~mask) + mask + 1  Round up.
917
        else
918
          Y = (<large int> & ~mask)             Round down.
919
        Z = X + Y
920
        memory (Z + (<large int> - Y));
921
 
922
   This is for CSE to find several similar references, and only use one Z.
923
 
924
   X can either be a SYMBOL_REF or REG, but because combine cannot
925
   perform a 4->2 combination we do nothing for SYMBOL_REF + D where
926
   D will not fit in 14 bits.
927
 
928
   MODE_FLOAT references allow displacements which fit in 5 bits, so use
929
   0x1f as the mask.
930
 
931
   MODE_INT references allow displacements which fit in 14 bits, so use
932
   0x3fff as the mask.
933
 
934
   This relies on the fact that most mode MODE_FLOAT references will use FP
935
   registers and most mode MODE_INT references will use integer registers.
936
   (In the rare case of an FP register used in an integer MODE, we depend
937
   on secondary reloads to clean things up.)
938
 
939
 
940
   It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
941
   manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
942
   addressing modes to be used).
943
 
944
   Put X and Z into registers.  Then put the entire expression into
945
   a register.  */
946
 
947
rtx
948
hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
949
                         enum machine_mode mode)
950
{
951
  rtx orig = x;
952
 
953
  /* We need to canonicalize the order of operands in unscaled indexed
954
     addresses since the code that checks if an address is valid doesn't
955
     always try both orders.  */
956
  if (!TARGET_NO_SPACE_REGS
957
      && GET_CODE (x) == PLUS
958
      && GET_MODE (x) == Pmode
959
      && REG_P (XEXP (x, 0))
960
      && REG_P (XEXP (x, 1))
961
      && REG_POINTER (XEXP (x, 0))
962
      && !REG_POINTER (XEXP (x, 1)))
963
    return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
964
 
965
  if (PA_SYMBOL_REF_TLS_P (x))
966
    return legitimize_tls_address (x);
967
  else if (flag_pic)
968
    return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
969
 
970
  /* Strip off CONST.  */
971
  if (GET_CODE (x) == CONST)
972
    x = XEXP (x, 0);
973
 
974
  /* Special case.  Get the SYMBOL_REF into a register and use indexing.
975
     That should always be safe.  */
976
  if (GET_CODE (x) == PLUS
977
      && GET_CODE (XEXP (x, 0)) == REG
978
      && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
979
    {
980
      rtx reg = force_reg (Pmode, XEXP (x, 1));
981
      return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
982
    }
983
 
984
  /* Note we must reject symbols which represent function addresses
985
     since the assembler/linker can't handle arithmetic on plabels.  */
986
  if (GET_CODE (x) == PLUS
987
      && GET_CODE (XEXP (x, 1)) == CONST_INT
988
      && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
989
           && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
990
          || GET_CODE (XEXP (x, 0)) == REG))
991
    {
992
      rtx int_part, ptr_reg;
993
      int newoffset;
994
      int offset = INTVAL (XEXP (x, 1));
995
      int mask;
996
 
997
      mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
998
              ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
999
 
1000
      /* Choose which way to round the offset.  Round up if we
1001
         are >= halfway to the next boundary.  */
1002
      if ((offset & mask) >= ((mask + 1) / 2))
1003
        newoffset = (offset & ~ mask) + mask + 1;
1004
      else
1005
        newoffset = (offset & ~ mask);
1006
 
1007
      /* If the newoffset will not fit in 14 bits (ldo), then
1008
         handling this would take 4 or 5 instructions (2 to load
1009
         the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1010
         add the new offset and the SYMBOL_REF.)  Combine can
1011
         not handle 4->2 or 5->2 combinations, so do not create
1012
         them.  */
1013
      if (! VAL_14_BITS_P (newoffset)
1014
          && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1015
        {
1016
          rtx const_part = plus_constant (XEXP (x, 0), newoffset);
1017
          rtx tmp_reg
1018
            = force_reg (Pmode,
1019
                         gen_rtx_HIGH (Pmode, const_part));
1020
          ptr_reg
1021
            = force_reg (Pmode,
1022
                         gen_rtx_LO_SUM (Pmode,
1023
                                         tmp_reg, const_part));
1024
        }
1025
      else
1026
        {
1027
          if (! VAL_14_BITS_P (newoffset))
1028
            int_part = force_reg (Pmode, GEN_INT (newoffset));
1029
          else
1030
            int_part = GEN_INT (newoffset);
1031
 
1032
          ptr_reg = force_reg (Pmode,
1033
                               gen_rtx_PLUS (Pmode,
1034
                                             force_reg (Pmode, XEXP (x, 0)),
1035
                                             int_part));
1036
        }
1037
      return plus_constant (ptr_reg, offset - newoffset);
1038
    }
1039
 
1040
  /* Handle (plus (mult (a) (shadd_constant)) (b)).  */
1041
 
1042
  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1043
      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1044
      && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
1045
      && (OBJECT_P (XEXP (x, 1))
1046
          || GET_CODE (XEXP (x, 1)) == SUBREG)
1047
      && GET_CODE (XEXP (x, 1)) != CONST)
1048
    {
1049
      int val = INTVAL (XEXP (XEXP (x, 0), 1));
1050
      rtx reg1, reg2;
1051
 
1052
      reg1 = XEXP (x, 1);
1053
      if (GET_CODE (reg1) != REG)
1054
        reg1 = force_reg (Pmode, force_operand (reg1, 0));
1055
 
1056
      reg2 = XEXP (XEXP (x, 0), 0);
1057
      if (GET_CODE (reg2) != REG)
1058
        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1059
 
1060
      return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1061
                                             gen_rtx_MULT (Pmode,
1062
                                                           reg2,
1063
                                                           GEN_INT (val)),
1064
                                             reg1));
1065
    }
1066
 
1067
  /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1068
 
1069
     Only do so for floating point modes since this is more speculative
1070
     and we lose if it's an integer store.  */
1071
  if (GET_CODE (x) == PLUS
1072
      && GET_CODE (XEXP (x, 0)) == PLUS
1073
      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1074
      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
1075
      && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1076
      && (mode == SFmode || mode == DFmode))
1077
    {
1078
 
1079
      /* First, try and figure out what to use as a base register.  */
1080
      rtx reg1, reg2, base, idx, orig_base;
1081
 
1082
      reg1 = XEXP (XEXP (x, 0), 1);
1083
      reg2 = XEXP (x, 1);
1084
      base = NULL_RTX;
1085
      idx = NULL_RTX;
1086
 
1087
      /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
1088
         then emit_move_sequence will turn on REG_POINTER so we'll know
1089
         it's a base register below.  */
1090
      if (GET_CODE (reg1) != REG)
1091
        reg1 = force_reg (Pmode, force_operand (reg1, 0));
1092
 
1093
      if (GET_CODE (reg2) != REG)
1094
        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1095
 
1096
      /* Figure out what the base and index are.  */
1097
 
1098
      if (GET_CODE (reg1) == REG
1099
          && REG_POINTER (reg1))
1100
        {
1101
          base = reg1;
1102
          orig_base = XEXP (XEXP (x, 0), 1);
1103
          idx = gen_rtx_PLUS (Pmode,
1104
                              gen_rtx_MULT (Pmode,
1105
                                            XEXP (XEXP (XEXP (x, 0), 0), 0),
1106
                                            XEXP (XEXP (XEXP (x, 0), 0), 1)),
1107
                              XEXP (x, 1));
1108
        }
1109
      else if (GET_CODE (reg2) == REG
1110
               && REG_POINTER (reg2))
1111
        {
1112
          base = reg2;
1113
          orig_base = XEXP (x, 1);
1114
          idx = XEXP (x, 0);
1115
        }
1116
 
1117
      if (base == 0)
1118
        return orig;
1119
 
1120
      /* If the index adds a large constant, try to scale the
1121
         constant so that it can be loaded with only one insn.  */
1122
      if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1123
          && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1124
                            / INTVAL (XEXP (XEXP (idx, 0), 1)))
1125
          && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1126
        {
1127
          /* Divide the CONST_INT by the scale factor, then add it to A.  */
1128
          int val = INTVAL (XEXP (idx, 1));
1129
 
1130
          val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1131
          reg1 = XEXP (XEXP (idx, 0), 0);
1132
          if (GET_CODE (reg1) != REG)
1133
            reg1 = force_reg (Pmode, force_operand (reg1, 0));
1134
 
1135
          reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1136
 
1137
          /* We can now generate a simple scaled indexed address.  */
1138
          return
1139
            force_reg
1140
              (Pmode, gen_rtx_PLUS (Pmode,
1141
                                    gen_rtx_MULT (Pmode, reg1,
1142
                                                  XEXP (XEXP (idx, 0), 1)),
1143
                                    base));
1144
        }
1145
 
1146
      /* If B + C is still a valid base register, then add them.  */
1147
      if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1148
          && INTVAL (XEXP (idx, 1)) <= 4096
1149
          && INTVAL (XEXP (idx, 1)) >= -4096)
1150
        {
1151
          int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1152
          rtx reg1, reg2;
1153
 
1154
          reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1155
 
1156
          reg2 = XEXP (XEXP (idx, 0), 0);
1157
          if (GET_CODE (reg2) != CONST_INT)
1158
            reg2 = force_reg (Pmode, force_operand (reg2, 0));
1159
 
1160
          return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1161
                                                 gen_rtx_MULT (Pmode,
1162
                                                               reg2,
1163
                                                               GEN_INT (val)),
1164
                                                 reg1));
1165
        }
1166
 
1167
      /* Get the index into a register, then add the base + index and
1168
         return a register holding the result.  */
1169
 
1170
      /* First get A into a register.  */
1171
      reg1 = XEXP (XEXP (idx, 0), 0);
1172
      if (GET_CODE (reg1) != REG)
1173
        reg1 = force_reg (Pmode, force_operand (reg1, 0));
1174
 
1175
      /* And get B into a register.  */
1176
      reg2 = XEXP (idx, 1);
1177
      if (GET_CODE (reg2) != REG)
1178
        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1179
 
1180
      reg1 = force_reg (Pmode,
1181
                        gen_rtx_PLUS (Pmode,
1182
                                      gen_rtx_MULT (Pmode, reg1,
1183
                                                    XEXP (XEXP (idx, 0), 1)),
1184
                                      reg2));
1185
 
1186
      /* Add the result to our base register and return.  */
1187
      return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1188
 
1189
    }
1190
 
1191
  /* Uh-oh.  We might have an address for x[n-100000].  This needs
1192
     special handling to avoid creating an indexed memory address
1193
     with x-100000 as the base.
1194
 
1195
     If the constant part is small enough, then it's still safe because
1196
     there is a guard page at the beginning and end of the data segment.
1197
 
1198
     Scaled references are common enough that we want to try and rearrange the
1199
     terms so that we can use indexing for these addresses too.  Only
1200
     do the optimization for floatint point modes.  */
1201
 
1202
  if (GET_CODE (x) == PLUS
1203
      && symbolic_expression_p (XEXP (x, 1)))
1204
    {
1205
      /* Ugly.  We modify things here so that the address offset specified
1206
         by the index expression is computed first, then added to x to form
1207
         the entire address.  */
1208
 
1209
      rtx regx1, regx2, regy1, regy2, y;
1210
 
1211
      /* Strip off any CONST.  */
1212
      y = XEXP (x, 1);
1213
      if (GET_CODE (y) == CONST)
1214
        y = XEXP (y, 0);
1215
 
1216
      if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1217
        {
1218
          /* See if this looks like
1219
                (plus (mult (reg) (shadd_const))
1220
                      (const (plus (symbol_ref) (const_int))))
1221
 
1222
             Where const_int is small.  In that case the const
1223
             expression is a valid pointer for indexing.
1224
 
1225
             If const_int is big, but can be divided evenly by shadd_const
1226
             and added to (reg).  This allows more scaled indexed addresses.  */
1227
          if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1228
              && GET_CODE (XEXP (x, 0)) == MULT
1229
              && GET_CODE (XEXP (y, 1)) == CONST_INT
1230
              && INTVAL (XEXP (y, 1)) >= -4096
1231
              && INTVAL (XEXP (y, 1)) <= 4095
1232
              && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1233
              && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1234
            {
1235
              int val = INTVAL (XEXP (XEXP (x, 0), 1));
1236
              rtx reg1, reg2;
1237
 
1238
              reg1 = XEXP (x, 1);
1239
              if (GET_CODE (reg1) != REG)
1240
                reg1 = force_reg (Pmode, force_operand (reg1, 0));
1241
 
1242
              reg2 = XEXP (XEXP (x, 0), 0);
1243
              if (GET_CODE (reg2) != REG)
1244
                reg2 = force_reg (Pmode, force_operand (reg2, 0));
1245
 
1246
              return force_reg (Pmode,
1247
                                gen_rtx_PLUS (Pmode,
1248
                                              gen_rtx_MULT (Pmode,
1249
                                                            reg2,
1250
                                                            GEN_INT (val)),
1251
                                              reg1));
1252
            }
1253
          else if ((mode == DFmode || mode == SFmode)
1254
                   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1255
                   && GET_CODE (XEXP (x, 0)) == MULT
1256
                   && GET_CODE (XEXP (y, 1)) == CONST_INT
1257
                   && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1258
                   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1259
                   && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1260
            {
1261
              regx1
1262
                = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1263
                                             / INTVAL (XEXP (XEXP (x, 0), 1))));
1264
              regx2 = XEXP (XEXP (x, 0), 0);
1265
              if (GET_CODE (regx2) != REG)
1266
                regx2 = force_reg (Pmode, force_operand (regx2, 0));
1267
              regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1268
                                                        regx2, regx1));
1269
              return
1270
                force_reg (Pmode,
1271
                           gen_rtx_PLUS (Pmode,
1272
                                         gen_rtx_MULT (Pmode, regx2,
1273
                                                       XEXP (XEXP (x, 0), 1)),
1274
                                         force_reg (Pmode, XEXP (y, 0))));
1275
            }
1276
          else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1277
                   && INTVAL (XEXP (y, 1)) >= -4096
1278
                   && INTVAL (XEXP (y, 1)) <= 4095)
1279
            {
1280
              /* This is safe because of the guard page at the
1281
                 beginning and end of the data space.  Just
1282
                 return the original address.  */
1283
              return orig;
1284
            }
1285
          else
1286
            {
1287
              /* Doesn't look like one we can optimize.  */
1288
              regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1289
              regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1290
              regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1291
              regx1 = force_reg (Pmode,
1292
                                 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1293
                                                 regx1, regy2));
1294
              return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1295
            }
1296
        }
1297
    }
1298
 
1299
  return orig;
1300
}
1301
 
1302
/* For the HPPA, REG and REG+CONST is cost 0
1303
   and addresses involving symbolic constants are cost 2.
1304
 
1305
   PIC addresses are very expensive.
1306
 
1307
   It is no coincidence that this has the same structure
1308
   as GO_IF_LEGITIMATE_ADDRESS.  */
1309
 
1310
static int
1311
hppa_address_cost (rtx X,
1312
                   bool speed ATTRIBUTE_UNUSED)
1313
{
1314
  switch (GET_CODE (X))
1315
    {
1316
    case REG:
1317
    case PLUS:
1318
    case LO_SUM:
1319
      return 1;
1320
    case HIGH:
1321
      return 2;
1322
    default:
1323
      return 4;
1324
    }
1325
}
1326
 
1327
/* Compute a (partial) cost for rtx X.  Return true if the complete
1328
   cost has been computed, and false if subexpressions should be
1329
   scanned.  In either case, *TOTAL contains the cost result.  */
1330
 
1331
static bool
1332
hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
1333
                bool speed ATTRIBUTE_UNUSED)
1334
{
1335
  switch (code)
1336
    {
1337
    case CONST_INT:
1338
      if (INTVAL (x) == 0)
1339
        *total = 0;
1340
      else if (INT_14_BITS (x))
1341
        *total = 1;
1342
      else
1343
        *total = 2;
1344
      return true;
1345
 
1346
    case HIGH:
1347
      *total = 2;
1348
      return true;
1349
 
1350
    case CONST:
1351
    case LABEL_REF:
1352
    case SYMBOL_REF:
1353
      *total = 4;
1354
      return true;
1355
 
1356
    case CONST_DOUBLE:
1357
      if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1358
          && outer_code != SET)
1359
        *total = 0;
1360
      else
1361
        *total = 8;
1362
      return true;
1363
 
1364
    case MULT:
1365
      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1366
        *total = COSTS_N_INSNS (3);
1367
      else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1368
        *total = COSTS_N_INSNS (8);
1369
      else
1370
        *total = COSTS_N_INSNS (20);
1371
      return true;
1372
 
1373
    case DIV:
1374
      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1375
        {
1376
          *total = COSTS_N_INSNS (14);
1377
          return true;
1378
        }
1379
      /* FALLTHRU */
1380
 
1381
    case UDIV:
1382
    case MOD:
1383
    case UMOD:
1384
      *total = COSTS_N_INSNS (60);
1385
      return true;
1386
 
1387
    case PLUS: /* this includes shNadd insns */
1388
    case MINUS:
1389
      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1390
        *total = COSTS_N_INSNS (3);
1391
      else
1392
        *total = COSTS_N_INSNS (1);
1393
      return true;
1394
 
1395
    case ASHIFT:
1396
    case ASHIFTRT:
1397
    case LSHIFTRT:
1398
      *total = COSTS_N_INSNS (1);
1399
      return true;
1400
 
1401
    default:
1402
      return false;
1403
    }
1404
}
1405
 
1406
/* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1407
   new rtx with the correct mode.  */
1408
static inline rtx
1409
force_mode (enum machine_mode mode, rtx orig)
1410
{
1411
  if (mode == GET_MODE (orig))
1412
    return orig;
1413
 
1414
  gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1415
 
1416
  return gen_rtx_REG (mode, REGNO (orig));
1417
}
1418
 
1419
/* Return 1 if *X is a thread-local symbol.  */
1420
 
1421
static int
1422
pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1423
{
1424
  return PA_SYMBOL_REF_TLS_P (*x);
1425
}
1426
 
1427
/* Return 1 if X contains a thread-local symbol.  */
1428
 
1429
bool
1430
pa_tls_referenced_p (rtx x)
1431
{
1432
  if (!TARGET_HAVE_TLS)
1433
    return false;
1434
 
1435
  return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1436
}
1437
 
1438
/* Emit insns to move operands[1] into operands[0].
1439
 
1440
   Return 1 if we have written out everything that needs to be done to
1441
   do the move.  Otherwise, return 0 and the caller will emit the move
1442
   normally.
1443
 
1444
   Note SCRATCH_REG may not be in the proper mode depending on how it
1445
   will be used.  This routine is responsible for creating a new copy
1446
   of SCRATCH_REG in the proper mode.  */
1447
 
1448
int
1449
emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1450
{
1451
  register rtx operand0 = operands[0];
1452
  register rtx operand1 = operands[1];
1453
  register rtx tem;
1454
 
1455
  /* We can only handle indexed addresses in the destination operand
1456
     of floating point stores.  Thus, we need to break out indexed
1457
     addresses from the destination operand.  */
1458
  if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1459
    {
1460
      gcc_assert (can_create_pseudo_p ());
1461
 
1462
      tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1463
      operand0 = replace_equiv_address (operand0, tem);
1464
    }
1465
 
1466
  /* On targets with non-equivalent space registers, break out unscaled
1467
     indexed addresses from the source operand before the final CSE.
1468
     We have to do this because the REG_POINTER flag is not correctly
1469
     carried through various optimization passes and CSE may substitute
1470
     a pseudo without the pointer set for one with the pointer set.  As
1471
     a result, we loose various opportunities to create insns with
1472
     unscaled indexed addresses.  */
1473
  if (!TARGET_NO_SPACE_REGS
1474
      && !cse_not_expected
1475
      && GET_CODE (operand1) == MEM
1476
      && GET_CODE (XEXP (operand1, 0)) == PLUS
1477
      && REG_P (XEXP (XEXP (operand1, 0), 0))
1478
      && REG_P (XEXP (XEXP (operand1, 0), 1)))
1479
    operand1
1480
      = replace_equiv_address (operand1,
1481
                               copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1482
 
1483
  if (scratch_reg
1484
      && reload_in_progress && GET_CODE (operand0) == REG
1485
      && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1486
    operand0 = reg_equiv_mem[REGNO (operand0)];
1487
  else if (scratch_reg
1488
           && reload_in_progress && GET_CODE (operand0) == SUBREG
1489
           && GET_CODE (SUBREG_REG (operand0)) == REG
1490
           && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1491
    {
1492
     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1493
        the code which tracks sets/uses for delete_output_reload.  */
1494
      rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1495
                                 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1496
                                 SUBREG_BYTE (operand0));
1497
      operand0 = alter_subreg (&temp);
1498
    }
1499
 
1500
  if (scratch_reg
1501
      && reload_in_progress && GET_CODE (operand1) == REG
1502
      && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1503
    operand1 = reg_equiv_mem[REGNO (operand1)];
1504
  else if (scratch_reg
1505
           && reload_in_progress && GET_CODE (operand1) == SUBREG
1506
           && GET_CODE (SUBREG_REG (operand1)) == REG
1507
           && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1508
    {
1509
     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1510
        the code which tracks sets/uses for delete_output_reload.  */
1511
      rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1512
                                 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1513
                                 SUBREG_BYTE (operand1));
1514
      operand1 = alter_subreg (&temp);
1515
    }
1516
 
1517
  if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1518
      && ((tem = find_replacement (&XEXP (operand0, 0)))
1519
          != XEXP (operand0, 0)))
1520
    operand0 = replace_equiv_address (operand0, tem);
1521
 
1522
  if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1523
      && ((tem = find_replacement (&XEXP (operand1, 0)))
1524
          != XEXP (operand1, 0)))
1525
    operand1 = replace_equiv_address (operand1, tem);
1526
 
1527
  /* Handle secondary reloads for loads/stores of FP registers from
1528
     REG+D addresses where D does not fit in 5 or 14 bits, including
1529
     (subreg (mem (addr))) cases.  */
1530
  if (scratch_reg
1531
      && fp_reg_operand (operand0, mode)
1532
      && ((GET_CODE (operand1) == MEM
1533
           && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1534
                                 XEXP (operand1, 0)))
1535
          || ((GET_CODE (operand1) == SUBREG
1536
               && GET_CODE (XEXP (operand1, 0)) == MEM
1537
               && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1538
                                      ? SFmode : DFmode),
1539
                                     XEXP (XEXP (operand1, 0), 0))))))
1540
    {
1541
      if (GET_CODE (operand1) == SUBREG)
1542
        operand1 = XEXP (operand1, 0);
1543
 
1544
      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1545
         it in WORD_MODE regardless of what mode it was originally given
1546
         to us.  */
1547
      scratch_reg = force_mode (word_mode, scratch_reg);
1548
 
1549
      /* D might not fit in 14 bits either; for such cases load D into
1550
         scratch reg.  */
1551
      if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1552
        {
1553
          emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1554
          emit_move_insn (scratch_reg,
1555
                          gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1556
                                          Pmode,
1557
                                          XEXP (XEXP (operand1, 0), 0),
1558
                                          scratch_reg));
1559
        }
1560
      else
1561
        emit_move_insn (scratch_reg, XEXP (operand1, 0));
1562
      emit_insn (gen_rtx_SET (VOIDmode, operand0,
1563
                              replace_equiv_address (operand1, scratch_reg)));
1564
      return 1;
1565
    }
1566
  else if (scratch_reg
1567
           && fp_reg_operand (operand1, mode)
1568
           && ((GET_CODE (operand0) == MEM
1569
                && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1570
                                        ? SFmode : DFmode),
1571
                                       XEXP (operand0, 0)))
1572
               || ((GET_CODE (operand0) == SUBREG)
1573
                   && GET_CODE (XEXP (operand0, 0)) == MEM
1574
                   && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1575
                                          ? SFmode : DFmode),
1576
                                         XEXP (XEXP (operand0, 0), 0)))))
1577
    {
1578
      if (GET_CODE (operand0) == SUBREG)
1579
        operand0 = XEXP (operand0, 0);
1580
 
1581
      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1582
         it in WORD_MODE regardless of what mode it was originally given
1583
         to us.  */
1584
      scratch_reg = force_mode (word_mode, scratch_reg);
1585
 
1586
      /* D might not fit in 14 bits either; for such cases load D into
1587
         scratch reg.  */
1588
      if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1589
        {
1590
          emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1591
          emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1592
                                                                        0)),
1593
                                                       Pmode,
1594
                                                       XEXP (XEXP (operand0, 0),
1595
                                                                   0),
1596
                                                       scratch_reg));
1597
        }
1598
      else
1599
        emit_move_insn (scratch_reg, XEXP (operand0, 0));
1600
      emit_insn (gen_rtx_SET (VOIDmode,
1601
                              replace_equiv_address (operand0, scratch_reg),
1602
                              operand1));
1603
      return 1;
1604
    }
1605
  /* Handle secondary reloads for loads of FP registers from constant
1606
     expressions by forcing the constant into memory.
1607
 
1608
     Use scratch_reg to hold the address of the memory location.
1609
 
1610
     The proper fix is to change PREFERRED_RELOAD_CLASS to return
1611
     NO_REGS when presented with a const_int and a register class
1612
     containing only FP registers.  Doing so unfortunately creates
1613
     more problems than it solves.   Fix this for 2.5.  */
1614
  else if (scratch_reg
1615
           && CONSTANT_P (operand1)
1616
           && fp_reg_operand (operand0, mode))
1617
    {
1618
      rtx const_mem, xoperands[2];
1619
 
1620
      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1621
         it in WORD_MODE regardless of what mode it was originally given
1622
         to us.  */
1623
      scratch_reg = force_mode (word_mode, scratch_reg);
1624
 
1625
      /* Force the constant into memory and put the address of the
1626
         memory location into scratch_reg.  */
1627
      const_mem = force_const_mem (mode, operand1);
1628
      xoperands[0] = scratch_reg;
1629
      xoperands[1] = XEXP (const_mem, 0);
1630
      emit_move_sequence (xoperands, Pmode, 0);
1631
 
1632
      /* Now load the destination register.  */
1633
      emit_insn (gen_rtx_SET (mode, operand0,
1634
                              replace_equiv_address (const_mem, scratch_reg)));
1635
      return 1;
1636
    }
1637
  /* Handle secondary reloads for SAR.  These occur when trying to load
1638
     the SAR from memory, FP register, or with a constant.  */
1639
  else if (scratch_reg
1640
           && GET_CODE (operand0) == REG
1641
           && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1642
           && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1643
           && (GET_CODE (operand1) == MEM
1644
               || GET_CODE (operand1) == CONST_INT
1645
               || (GET_CODE (operand1) == REG
1646
                   && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1647
    {
1648
      /* D might not fit in 14 bits either; for such cases load D into
1649
         scratch reg.  */
1650
      if (GET_CODE (operand1) == MEM
1651
          && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1652
        {
1653
          /* We are reloading the address into the scratch register, so we
1654
             want to make sure the scratch register is a full register.  */
1655
          scratch_reg = force_mode (word_mode, scratch_reg);
1656
 
1657
          emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1658
          emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1659
                                                                        0)),
1660
                                                       Pmode,
1661
                                                       XEXP (XEXP (operand1, 0),
1662
                                                       0),
1663
                                                       scratch_reg));
1664
 
1665
          /* Now we are going to load the scratch register from memory,
1666
             we want to load it in the same width as the original MEM,
1667
             which must be the same as the width of the ultimate destination,
1668
             OPERAND0.  */
1669
          scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1670
 
1671
          emit_move_insn (scratch_reg,
1672
                          replace_equiv_address (operand1, scratch_reg));
1673
        }
1674
      else
1675
        {
1676
          /* We want to load the scratch register using the same mode as
1677
             the ultimate destination.  */
1678
          scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1679
 
1680
          emit_move_insn (scratch_reg, operand1);
1681
        }
1682
 
1683
      /* And emit the insn to set the ultimate destination.  We know that
1684
         the scratch register has the same mode as the destination at this
1685
         point.  */
1686
      emit_move_insn (operand0, scratch_reg);
1687
      return 1;
1688
    }
1689
  /* Handle the most common case: storing into a register.  */
1690
  else if (register_operand (operand0, mode))
1691
    {
1692
      if (register_operand (operand1, mode)
1693
          || (GET_CODE (operand1) == CONST_INT
1694
              && cint_ok_for_move (INTVAL (operand1)))
1695
          || (operand1 == CONST0_RTX (mode))
1696
          || (GET_CODE (operand1) == HIGH
1697
              && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1698
          /* Only `general_operands' can come here, so MEM is ok.  */
1699
          || GET_CODE (operand1) == MEM)
1700
        {
1701
          /* Various sets are created during RTL generation which don't
1702
             have the REG_POINTER flag correctly set.  After the CSE pass,
1703
             instruction recognition can fail if we don't consistently
1704
             set this flag when performing register copies.  This should
1705
             also improve the opportunities for creating insns that use
1706
             unscaled indexing.  */
1707
          if (REG_P (operand0) && REG_P (operand1))
1708
            {
1709
              if (REG_POINTER (operand1)
1710
                  && !REG_POINTER (operand0)
1711
                  && !HARD_REGISTER_P (operand0))
1712
                copy_reg_pointer (operand0, operand1);
1713
            }
1714
 
1715
          /* When MEMs are broken out, the REG_POINTER flag doesn't
1716
             get set.  In some cases, we can set the REG_POINTER flag
1717
             from the declaration for the MEM.  */
1718
          if (REG_P (operand0)
1719
              && GET_CODE (operand1) == MEM
1720
              && !REG_POINTER (operand0))
1721
            {
1722
              tree decl = MEM_EXPR (operand1);
1723
 
1724
              /* Set the register pointer flag and register alignment
1725
                 if the declaration for this memory reference is a
1726
                 pointer type.  Fortran indirect argument references
1727
                 are ignored.  */
1728
              if (decl
1729
                  && !(flag_argument_noalias > 1
1730
                       && TREE_CODE (decl) == INDIRECT_REF
1731
                       && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1732
                {
1733
                  tree type;
1734
 
1735
                  /* If this is a COMPONENT_REF, use the FIELD_DECL from
1736
                     tree operand 1.  */
1737
                  if (TREE_CODE (decl) == COMPONENT_REF)
1738
                    decl = TREE_OPERAND (decl, 1);
1739
 
1740
                  type = TREE_TYPE (decl);
1741
                  type = strip_array_types (type);
1742
 
1743
                  if (POINTER_TYPE_P (type))
1744
                    {
1745
                      int align;
1746
 
1747
                      type = TREE_TYPE (type);
1748
                      /* Using TYPE_ALIGN_OK is rather conservative as
1749
                         only the ada frontend actually sets it.  */
1750
                      align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1751
                               : BITS_PER_UNIT);
1752
                      mark_reg_pointer (operand0, align);
1753
                    }
1754
                }
1755
            }
1756
 
1757
          emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1758
          return 1;
1759
        }
1760
    }
1761
  else if (GET_CODE (operand0) == MEM)
1762
    {
1763
      if (mode == DFmode && operand1 == CONST0_RTX (mode)
1764
          && !(reload_in_progress || reload_completed))
1765
        {
1766
          rtx temp = gen_reg_rtx (DFmode);
1767
 
1768
          emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1769
          emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1770
          return 1;
1771
        }
1772
      if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1773
        {
1774
          /* Run this case quickly.  */
1775
          emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1776
          return 1;
1777
        }
1778
      if (! (reload_in_progress || reload_completed))
1779
        {
1780
          operands[0] = validize_mem (operand0);
1781
          operands[1] = operand1 = force_reg (mode, operand1);
1782
        }
1783
    }
1784
 
1785
  /* Simplify the source if we need to.
1786
     Note we do have to handle function labels here, even though we do
1787
     not consider them legitimate constants.  Loop optimizations can
1788
     call the emit_move_xxx with one as a source.  */
1789
  if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1790
      || function_label_operand (operand1, mode)
1791
      || (GET_CODE (operand1) == HIGH
1792
          && symbolic_operand (XEXP (operand1, 0), mode)))
1793
    {
1794
      int ishighonly = 0;
1795
 
1796
      if (GET_CODE (operand1) == HIGH)
1797
        {
1798
          ishighonly = 1;
1799
          operand1 = XEXP (operand1, 0);
1800
        }
1801
      if (symbolic_operand (operand1, mode))
1802
        {
1803
          /* Argh.  The assembler and linker can't handle arithmetic
1804
             involving plabels.
1805
 
1806
             So we force the plabel into memory, load operand0 from
1807
             the memory location, then add in the constant part.  */
1808
          if ((GET_CODE (operand1) == CONST
1809
               && GET_CODE (XEXP (operand1, 0)) == PLUS
1810
               && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1811
              || function_label_operand (operand1, mode))
1812
            {
1813
              rtx temp, const_part;
1814
 
1815
              /* Figure out what (if any) scratch register to use.  */
1816
              if (reload_in_progress || reload_completed)
1817
                {
1818
                  scratch_reg = scratch_reg ? scratch_reg : operand0;
1819
                  /* SCRATCH_REG will hold an address and maybe the actual
1820
                     data.  We want it in WORD_MODE regardless of what mode it
1821
                     was originally given to us.  */
1822
                  scratch_reg = force_mode (word_mode, scratch_reg);
1823
                }
1824
              else if (flag_pic)
1825
                scratch_reg = gen_reg_rtx (Pmode);
1826
 
1827
              if (GET_CODE (operand1) == CONST)
1828
                {
1829
                  /* Save away the constant part of the expression.  */
1830
                  const_part = XEXP (XEXP (operand1, 0), 1);
1831
                  gcc_assert (GET_CODE (const_part) == CONST_INT);
1832
 
1833
                  /* Force the function label into memory.  */
1834
                  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1835
                }
1836
              else
1837
                {
1838
                  /* No constant part.  */
1839
                  const_part = NULL_RTX;
1840
 
1841
                  /* Force the function label into memory.  */
1842
                  temp = force_const_mem (mode, operand1);
1843
                }
1844
 
1845
 
1846
              /* Get the address of the memory location.  PIC-ify it if
1847
                 necessary.  */
1848
              temp = XEXP (temp, 0);
1849
              if (flag_pic)
1850
                temp = legitimize_pic_address (temp, mode, scratch_reg);
1851
 
1852
              /* Put the address of the memory location into our destination
1853
                 register.  */
1854
              operands[1] = temp;
1855
              emit_move_sequence (operands, mode, scratch_reg);
1856
 
1857
              /* Now load from the memory location into our destination
1858
                 register.  */
1859
              operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1860
              emit_move_sequence (operands, mode, scratch_reg);
1861
 
1862
              /* And add back in the constant part.  */
1863
              if (const_part != NULL_RTX)
1864
                expand_inc (operand0, const_part);
1865
 
1866
              return 1;
1867
            }
1868
 
1869
          if (flag_pic)
1870
            {
1871
              rtx temp;
1872
 
1873
              if (reload_in_progress || reload_completed)
1874
                {
1875
                  temp = scratch_reg ? scratch_reg : operand0;
1876
                  /* TEMP will hold an address and maybe the actual
1877
                     data.  We want it in WORD_MODE regardless of what mode it
1878
                     was originally given to us.  */
1879
                  temp = force_mode (word_mode, temp);
1880
                }
1881
              else
1882
                temp = gen_reg_rtx (Pmode);
1883
 
1884
              /* (const (plus (symbol) (const_int))) must be forced to
1885
                 memory during/after reload if the const_int will not fit
1886
                 in 14 bits.  */
1887
              if (GET_CODE (operand1) == CONST
1888
                       && GET_CODE (XEXP (operand1, 0)) == PLUS
1889
                       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1890
                       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1891
                       && (reload_completed || reload_in_progress)
1892
                       && flag_pic)
1893
                {
1894
                  rtx const_mem = force_const_mem (mode, operand1);
1895
                  operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1896
                                                        mode, temp);
1897
                  operands[1] = replace_equiv_address (const_mem, operands[1]);
1898
                  emit_move_sequence (operands, mode, temp);
1899
                }
1900
              else
1901
                {
1902
                  operands[1] = legitimize_pic_address (operand1, mode, temp);
1903
                  if (REG_P (operand0) && REG_P (operands[1]))
1904
                    copy_reg_pointer (operand0, operands[1]);
1905
                  emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1906
                }
1907
            }
1908
          /* On the HPPA, references to data space are supposed to use dp,
1909
             register 27, but showing it in the RTL inhibits various cse
1910
             and loop optimizations.  */
1911
          else
1912
            {
1913
              rtx temp, set;
1914
 
1915
              if (reload_in_progress || reload_completed)
1916
                {
1917
                  temp = scratch_reg ? scratch_reg : operand0;
1918
                  /* TEMP will hold an address and maybe the actual
1919
                     data.  We want it in WORD_MODE regardless of what mode it
1920
                     was originally given to us.  */
1921
                  temp = force_mode (word_mode, temp);
1922
                }
1923
              else
1924
                temp = gen_reg_rtx (mode);
1925
 
1926
              /* Loading a SYMBOL_REF into a register makes that register
1927
                 safe to be used as the base in an indexed address.
1928
 
1929
                 Don't mark hard registers though.  That loses.  */
1930
              if (GET_CODE (operand0) == REG
1931
                  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1932
                mark_reg_pointer (operand0, BITS_PER_UNIT);
1933
              if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1934
                mark_reg_pointer (temp, BITS_PER_UNIT);
1935
 
1936
              if (ishighonly)
1937
                set = gen_rtx_SET (mode, operand0, temp);
1938
              else
1939
                set = gen_rtx_SET (VOIDmode,
1940
                                   operand0,
1941
                                   gen_rtx_LO_SUM (mode, temp, operand1));
1942
 
1943
              emit_insn (gen_rtx_SET (VOIDmode,
1944
                                      temp,
1945
                                      gen_rtx_HIGH (mode, operand1)));
1946
              emit_insn (set);
1947
 
1948
            }
1949
          return 1;
1950
        }
1951
      else if (pa_tls_referenced_p (operand1))
1952
        {
1953
          rtx tmp = operand1;
1954
          rtx addend = NULL;
1955
 
1956
          if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1957
            {
1958
              addend = XEXP (XEXP (tmp, 0), 1);
1959
              tmp = XEXP (XEXP (tmp, 0), 0);
1960
            }
1961
 
1962
          gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1963
          tmp = legitimize_tls_address (tmp);
1964
          if (addend)
1965
            {
1966
              tmp = gen_rtx_PLUS (mode, tmp, addend);
1967
              tmp = force_operand (tmp, operands[0]);
1968
            }
1969
          operands[1] = tmp;
1970
        }
1971
      else if (GET_CODE (operand1) != CONST_INT
1972
               || !cint_ok_for_move (INTVAL (operand1)))
1973
        {
1974
          rtx insn, temp;
1975
          rtx op1 = operand1;
1976
          HOST_WIDE_INT value = 0;
1977
          HOST_WIDE_INT insv = 0;
1978
          int insert = 0;
1979
 
1980
          if (GET_CODE (operand1) == CONST_INT)
1981
            value = INTVAL (operand1);
1982
 
1983
          if (TARGET_64BIT
1984
              && GET_CODE (operand1) == CONST_INT
1985
              && HOST_BITS_PER_WIDE_INT > 32
1986
              && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1987
            {
1988
              HOST_WIDE_INT nval;
1989
 
1990
              /* Extract the low order 32 bits of the value and sign extend.
1991
                 If the new value is the same as the original value, we can
1992
                 can use the original value as-is.  If the new value is
1993
                 different, we use it and insert the most-significant 32-bits
1994
                 of the original value into the final result.  */
1995
              nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1996
                      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1997
              if (value != nval)
1998
                {
1999
#if HOST_BITS_PER_WIDE_INT > 32
2000
                  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2001
#endif
2002
                  insert = 1;
2003
                  value = nval;
2004
                  operand1 = GEN_INT (nval);
2005
                }
2006
            }
2007
 
2008
          if (reload_in_progress || reload_completed)
2009
            temp = scratch_reg ? scratch_reg : operand0;
2010
          else
2011
            temp = gen_reg_rtx (mode);
2012
 
2013
          /* We don't directly split DImode constants on 32-bit targets
2014
             because PLUS uses an 11-bit immediate and the insn sequence
2015
             generated is not as efficient as the one using HIGH/LO_SUM.  */
2016
          if (GET_CODE (operand1) == CONST_INT
2017
              && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2018
              && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2019
              && !insert)
2020
            {
2021
              /* Directly break constant into high and low parts.  This
2022
                 provides better optimization opportunities because various
2023
                 passes recognize constants split with PLUS but not LO_SUM.
2024
                 We use a 14-bit signed low part except when the addition
2025
                 of 0x4000 to the high part might change the sign of the
2026
                 high part.  */
2027
              HOST_WIDE_INT low = value & 0x3fff;
2028
              HOST_WIDE_INT high = value & ~ 0x3fff;
2029
 
2030
              if (low >= 0x2000)
2031
                {
2032
                  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2033
                    high += 0x2000;
2034
                  else
2035
                    high += 0x4000;
2036
                }
2037
 
2038
              low = value - high;
2039
 
2040
              emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2041
              operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2042
            }
2043
          else
2044
            {
2045
              emit_insn (gen_rtx_SET (VOIDmode, temp,
2046
                                      gen_rtx_HIGH (mode, operand1)));
2047
              operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2048
            }
2049
 
2050
          insn = emit_move_insn (operands[0], operands[1]);
2051
 
2052
          /* Now insert the most significant 32 bits of the value
2053
             into the register.  When we don't have a second register
2054
             available, it could take up to nine instructions to load
2055
             a 64-bit integer constant.  Prior to reload, we force
2056
             constants that would take more than three instructions
2057
             to load to the constant pool.  During and after reload,
2058
             we have to handle all possible values.  */
2059
          if (insert)
2060
            {
2061
              /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2062
                 register and the value to be inserted is outside the
2063
                 range that can be loaded with three depdi instructions.  */
2064
              if (temp != operand0 && (insv >= 16384 || insv < -16384))
2065
                {
2066
                  operand1 = GEN_INT (insv);
2067
 
2068
                  emit_insn (gen_rtx_SET (VOIDmode, temp,
2069
                                          gen_rtx_HIGH (mode, operand1)));
2070
                  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2071
                  emit_insn (gen_insv (operand0, GEN_INT (32),
2072
                                       const0_rtx, temp));
2073
                }
2074
              else
2075
                {
2076
                  int len = 5, pos = 27;
2077
 
2078
                  /* Insert the bits using the depdi instruction.  */
2079
                  while (pos >= 0)
2080
                    {
2081
                      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2082
                      HOST_WIDE_INT sign = v5 < 0;
2083
 
2084
                      /* Left extend the insertion.  */
2085
                      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2086
                      while (pos > 0 && (insv & 1) == sign)
2087
                        {
2088
                          insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2089
                          len += 1;
2090
                          pos -= 1;
2091
                        }
2092
 
2093
                      emit_insn (gen_insv (operand0, GEN_INT (len),
2094
                                           GEN_INT (pos), GEN_INT (v5)));
2095
 
2096
                      len = pos > 0 && pos < 5 ? pos : 5;
2097
                      pos -= len;
2098
                    }
2099
                }
2100
            }
2101
 
2102
          set_unique_reg_note (insn, REG_EQUAL, op1);
2103
 
2104
          return 1;
2105
        }
2106
    }
2107
  /* Now have insn-emit do whatever it normally does.  */
2108
  return 0;
2109
}
2110
 
2111
/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2112
   it will need a link/runtime reloc).  */
2113
 
2114
int
2115
reloc_needed (tree exp)
2116
{
2117
  int reloc = 0;
2118
 
2119
  switch (TREE_CODE (exp))
2120
    {
2121
    case ADDR_EXPR:
2122
      return 1;
2123
 
2124
    case POINTER_PLUS_EXPR:
2125
    case PLUS_EXPR:
2126
    case MINUS_EXPR:
2127
      reloc = reloc_needed (TREE_OPERAND (exp, 0));
2128
      reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2129
      break;
2130
 
2131
    CASE_CONVERT:
2132
    case NON_LVALUE_EXPR:
2133
      reloc = reloc_needed (TREE_OPERAND (exp, 0));
2134
      break;
2135
 
2136
    case CONSTRUCTOR:
2137
      {
2138
        tree value;
2139
        unsigned HOST_WIDE_INT ix;
2140
 
2141
        FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2142
          if (value)
2143
            reloc |= reloc_needed (value);
2144
      }
2145
      break;
2146
 
2147
    case ERROR_MARK:
2148
      break;
2149
 
2150
    default:
2151
      break;
2152
    }
2153
  return reloc;
2154
}
2155
 
2156
/* Does operand (which is a symbolic_operand) live in text space?
2157
   If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2158
   will be true.  */
2159
 
2160
int
2161
read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2162
{
2163
  if (GET_CODE (operand) == CONST)
2164
    operand = XEXP (XEXP (operand, 0), 0);
2165
  if (flag_pic)
2166
    {
2167
      if (GET_CODE (operand) == SYMBOL_REF)
2168
        return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2169
    }
2170
  else
2171
    {
2172
      if (GET_CODE (operand) == SYMBOL_REF)
2173
        return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2174
    }
2175
  return 1;
2176
}
2177
 
2178
 
2179
/* Return the best assembler insn template
2180
   for moving operands[1] into operands[0] as a fullword.  */
2181
const char *
2182
singlemove_string (rtx *operands)
2183
{
2184
  HOST_WIDE_INT intval;
2185
 
2186
  if (GET_CODE (operands[0]) == MEM)
2187
    return "stw %r1,%0";
2188
  if (GET_CODE (operands[1]) == MEM)
2189
    return "ldw %1,%0";
2190
  if (GET_CODE (operands[1]) == CONST_DOUBLE)
2191
    {
2192
      long i;
2193
      REAL_VALUE_TYPE d;
2194
 
2195
      gcc_assert (GET_MODE (operands[1]) == SFmode);
2196
 
2197
      /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2198
         bit pattern.  */
2199
      REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2200
      REAL_VALUE_TO_TARGET_SINGLE (d, i);
2201
 
2202
      operands[1] = GEN_INT (i);
2203
      /* Fall through to CONST_INT case.  */
2204
    }
2205
  if (GET_CODE (operands[1]) == CONST_INT)
2206
    {
2207
      intval = INTVAL (operands[1]);
2208
 
2209
      if (VAL_14_BITS_P (intval))
2210
        return "ldi %1,%0";
2211
      else if ((intval & 0x7ff) == 0)
2212
        return "ldil L'%1,%0";
2213
      else if (zdepi_cint_p (intval))
2214
        return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2215
      else
2216
        return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2217
    }
2218
  return "copy %1,%0";
2219
}
2220
 
2221
 
2222
/* Compute position (in OP[1]) and width (in OP[2])
2223
   useful for copying IMM to a register using the zdepi
2224
   instructions.  Store the immediate value to insert in OP[0].  */
2225
static void
2226
compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2227
{
2228
  int lsb, len;
2229
 
2230
  /* Find the least significant set bit in IMM.  */
2231
  for (lsb = 0; lsb < 32; lsb++)
2232
    {
2233
      if ((imm & 1) != 0)
2234
        break;
2235
      imm >>= 1;
2236
    }
2237
 
2238
  /* Choose variants based on *sign* of the 5-bit field.  */
2239
  if ((imm & 0x10) == 0)
2240
    len = (lsb <= 28) ? 4 : 32 - lsb;
2241
  else
2242
    {
2243
      /* Find the width of the bitstring in IMM.  */
2244
      for (len = 5; len < 32 - lsb; len++)
2245
        {
2246
          if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2247
            break;
2248
        }
2249
 
2250
      /* Sign extend IMM as a 5-bit value.  */
2251
      imm = (imm & 0xf) - 0x10;
2252
    }
2253
 
2254
  op[0] = imm;
2255
  op[1] = 31 - lsb;
2256
  op[2] = len;
2257
}
2258
 
2259
/* Compute position (in OP[1]) and width (in OP[2])
2260
   useful for copying IMM to a register using the depdi,z
2261
   instructions.  Store the immediate value to insert in OP[0].  */
2262
void
2263
compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2264
{
2265
  int lsb, len, maxlen;
2266
 
2267
  maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2268
 
2269
  /* Find the least significant set bit in IMM.  */
2270
  for (lsb = 0; lsb < maxlen; lsb++)
2271
    {
2272
      if ((imm & 1) != 0)
2273
        break;
2274
      imm >>= 1;
2275
    }
2276
 
2277
  /* Choose variants based on *sign* of the 5-bit field.  */
2278
  if ((imm & 0x10) == 0)
2279
    len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2280
  else
2281
    {
2282
      /* Find the width of the bitstring in IMM.  */
2283
      for (len = 5; len < maxlen - lsb; len++)
2284
        {
2285
          if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2286
            break;
2287
        }
2288
 
2289
      /* Extend length if host is narrow and IMM is negative.  */
2290
      if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2291
        len += 32;
2292
 
2293
      /* Sign extend IMM as a 5-bit value.  */
2294
      imm = (imm & 0xf) - 0x10;
2295
    }
2296
 
2297
  op[0] = imm;
2298
  op[1] = 63 - lsb;
2299
  op[2] = len;
2300
}
2301
 
2302
/* Output assembler code to perform a doubleword move insn
2303
   with operands OPERANDS.  */
2304
 
2305
const char *
2306
output_move_double (rtx *operands)
2307
{
2308
  enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2309
  rtx latehalf[2];
2310
  rtx addreg0 = 0, addreg1 = 0;
2311
 
2312
  /* First classify both operands.  */
2313
 
2314
  if (REG_P (operands[0]))
2315
    optype0 = REGOP;
2316
  else if (offsettable_memref_p (operands[0]))
2317
    optype0 = OFFSOP;
2318
  else if (GET_CODE (operands[0]) == MEM)
2319
    optype0 = MEMOP;
2320
  else
2321
    optype0 = RNDOP;
2322
 
2323
  if (REG_P (operands[1]))
2324
    optype1 = REGOP;
2325
  else if (CONSTANT_P (operands[1]))
2326
    optype1 = CNSTOP;
2327
  else if (offsettable_memref_p (operands[1]))
2328
    optype1 = OFFSOP;
2329
  else if (GET_CODE (operands[1]) == MEM)
2330
    optype1 = MEMOP;
2331
  else
2332
    optype1 = RNDOP;
2333
 
2334
  /* Check for the cases that the operand constraints are not
2335
     supposed to allow to happen.  */
2336
  gcc_assert (optype0 == REGOP || optype1 == REGOP);
2337
 
2338
  /* Handle copies between general and floating registers.  */
2339
 
2340
  if (optype0 == REGOP && optype1 == REGOP
2341
      && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2342
    {
2343
      if (FP_REG_P (operands[0]))
2344
        {
2345
          output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2346
          output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2347
          return "{fldds|fldd} -16(%%sp),%0";
2348
        }
2349
      else
2350
        {
2351
          output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2352
          output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2353
          return "{ldws|ldw} -12(%%sp),%R0";
2354
        }
2355
    }
2356
 
2357
   /* Handle auto decrementing and incrementing loads and stores
2358
     specifically, since the structure of the function doesn't work
2359
     for them without major modification.  Do it better when we learn
2360
     this port about the general inc/dec addressing of PA.
2361
     (This was written by tege.  Chide him if it doesn't work.)  */
2362
 
2363
  if (optype0 == MEMOP)
2364
    {
2365
      /* We have to output the address syntax ourselves, since print_operand
2366
         doesn't deal with the addresses we want to use.  Fix this later.  */
2367
 
2368
      rtx addr = XEXP (operands[0], 0);
2369
      if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2370
        {
2371
          rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2372
 
2373
          operands[0] = XEXP (addr, 0);
2374
          gcc_assert (GET_CODE (operands[1]) == REG
2375
                      && GET_CODE (operands[0]) == REG);
2376
 
2377
          gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2378
 
2379
          /* No overlap between high target register and address
2380
             register.  (We do this in a non-obvious way to
2381
             save a register file writeback)  */
2382
          if (GET_CODE (addr) == POST_INC)
2383
            return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2384
          return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2385
        }
2386
      else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2387
        {
2388
          rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2389
 
2390
          operands[0] = XEXP (addr, 0);
2391
          gcc_assert (GET_CODE (operands[1]) == REG
2392
                      && GET_CODE (operands[0]) == REG);
2393
 
2394
          gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2395
          /* No overlap between high target register and address
2396
             register.  (We do this in a non-obvious way to save a
2397
             register file writeback)  */
2398
          if (GET_CODE (addr) == PRE_INC)
2399
            return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2400
          return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2401
        }
2402
    }
2403
  if (optype1 == MEMOP)
2404
    {
2405
      /* We have to output the address syntax ourselves, since print_operand
2406
         doesn't deal with the addresses we want to use.  Fix this later.  */
2407
 
2408
      rtx addr = XEXP (operands[1], 0);
2409
      if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2410
        {
2411
          rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2412
 
2413
          operands[1] = XEXP (addr, 0);
2414
          gcc_assert (GET_CODE (operands[0]) == REG
2415
                      && GET_CODE (operands[1]) == REG);
2416
 
2417
          if (!reg_overlap_mentioned_p (high_reg, addr))
2418
            {
2419
              /* No overlap between high target register and address
2420
                 register.  (We do this in a non-obvious way to
2421
                 save a register file writeback)  */
2422
              if (GET_CODE (addr) == POST_INC)
2423
                return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2424
              return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2425
            }
2426
          else
2427
            {
2428
              /* This is an undefined situation.  We should load into the
2429
                 address register *and* update that register.  Probably
2430
                 we don't need to handle this at all.  */
2431
              if (GET_CODE (addr) == POST_INC)
2432
                return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2433
              return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2434
            }
2435
        }
2436
      else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2437
        {
2438
          rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2439
 
2440
          operands[1] = XEXP (addr, 0);
2441
          gcc_assert (GET_CODE (operands[0]) == REG
2442
                      && GET_CODE (operands[1]) == REG);
2443
 
2444
          if (!reg_overlap_mentioned_p (high_reg, addr))
2445
            {
2446
              /* No overlap between high target register and address
2447
                 register.  (We do this in a non-obvious way to
2448
                 save a register file writeback)  */
2449
              if (GET_CODE (addr) == PRE_INC)
2450
                return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2451
              return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2452
            }
2453
          else
2454
            {
2455
              /* This is an undefined situation.  We should load into the
2456
                 address register *and* update that register.  Probably
2457
                 we don't need to handle this at all.  */
2458
              if (GET_CODE (addr) == PRE_INC)
2459
                return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2460
              return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2461
            }
2462
        }
2463
      else if (GET_CODE (addr) == PLUS
2464
               && GET_CODE (XEXP (addr, 0)) == MULT)
2465
        {
2466
          rtx xoperands[4];
2467
          rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2468
 
2469
          if (!reg_overlap_mentioned_p (high_reg, addr))
2470
            {
2471
              xoperands[0] = high_reg;
2472
              xoperands[1] = XEXP (addr, 1);
2473
              xoperands[2] = XEXP (XEXP (addr, 0), 0);
2474
              xoperands[3] = XEXP (XEXP (addr, 0), 1);
2475
              output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2476
                               xoperands);
2477
              return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2478
            }
2479
          else
2480
            {
2481
              xoperands[0] = high_reg;
2482
              xoperands[1] = XEXP (addr, 1);
2483
              xoperands[2] = XEXP (XEXP (addr, 0), 0);
2484
              xoperands[3] = XEXP (XEXP (addr, 0), 1);
2485
              output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2486
                               xoperands);
2487
              return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2488
            }
2489
        }
2490
    }
2491
 
2492
  /* If an operand is an unoffsettable memory ref, find a register
2493
     we can increment temporarily to make it refer to the second word.  */
2494
 
2495
  if (optype0 == MEMOP)
2496
    addreg0 = find_addr_reg (XEXP (operands[0], 0));
2497
 
2498
  if (optype1 == MEMOP)
2499
    addreg1 = find_addr_reg (XEXP (operands[1], 0));
2500
 
2501
  /* Ok, we can do one word at a time.
2502
     Normally we do the low-numbered word first.
2503
 
2504
     In either case, set up in LATEHALF the operands to use
2505
     for the high-numbered word and in some cases alter the
2506
     operands in OPERANDS to be suitable for the low-numbered word.  */
2507
 
2508
  if (optype0 == REGOP)
2509
    latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2510
  else if (optype0 == OFFSOP)
2511
    latehalf[0] = adjust_address (operands[0], SImode, 4);
2512
  else
2513
    latehalf[0] = operands[0];
2514
 
2515
  if (optype1 == REGOP)
2516
    latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2517
  else if (optype1 == OFFSOP)
2518
    latehalf[1] = adjust_address (operands[1], SImode, 4);
2519
  else if (optype1 == CNSTOP)
2520
    split_double (operands[1], &operands[1], &latehalf[1]);
2521
  else
2522
    latehalf[1] = operands[1];
2523
 
2524
  /* If the first move would clobber the source of the second one,
2525
     do them in the other order.
2526
 
2527
     This can happen in two cases:
2528
 
2529
        mem -> register where the first half of the destination register
2530
        is the same register used in the memory's address.  Reload
2531
        can create such insns.
2532
 
2533
        mem in this case will be either register indirect or register
2534
        indirect plus a valid offset.
2535
 
2536
        register -> register move where REGNO(dst) == REGNO(src + 1)
2537
        someone (Tim/Tege?) claimed this can happen for parameter loads.
2538
 
2539
     Handle mem -> register case first.  */
2540
  if (optype0 == REGOP
2541
      && (optype1 == MEMOP || optype1 == OFFSOP)
2542
      && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2543
                            operands[1], 0))
2544
    {
2545
      /* Do the late half first.  */
2546
      if (addreg1)
2547
        output_asm_insn ("ldo 4(%0),%0", &addreg1);
2548
      output_asm_insn (singlemove_string (latehalf), latehalf);
2549
 
2550
      /* Then clobber.  */
2551
      if (addreg1)
2552
        output_asm_insn ("ldo -4(%0),%0", &addreg1);
2553
      return singlemove_string (operands);
2554
    }
2555
 
2556
  /* Now handle register -> register case.  */
2557
  if (optype0 == REGOP && optype1 == REGOP
2558
      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2559
    {
2560
      output_asm_insn (singlemove_string (latehalf), latehalf);
2561
      return singlemove_string (operands);
2562
    }
2563
 
2564
  /* Normal case: do the two words, low-numbered first.  */
2565
 
2566
  output_asm_insn (singlemove_string (operands), operands);
2567
 
2568
  /* Make any unoffsettable addresses point at high-numbered word.  */
2569
  if (addreg0)
2570
    output_asm_insn ("ldo 4(%0),%0", &addreg0);
2571
  if (addreg1)
2572
    output_asm_insn ("ldo 4(%0),%0", &addreg1);
2573
 
2574
  /* Do that word.  */
2575
  output_asm_insn (singlemove_string (latehalf), latehalf);
2576
 
2577
  /* Undo the adds we just did.  */
2578
  if (addreg0)
2579
    output_asm_insn ("ldo -4(%0),%0", &addreg0);
2580
  if (addreg1)
2581
    output_asm_insn ("ldo -4(%0),%0", &addreg1);
2582
 
2583
  return "";
2584
}
2585
 
2586
const char *
2587
output_fp_move_double (rtx *operands)
2588
{
2589
  if (FP_REG_P (operands[0]))
2590
    {
2591
      if (FP_REG_P (operands[1])
2592
          || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2593
        output_asm_insn ("fcpy,dbl %f1,%0", operands);
2594
      else
2595
        output_asm_insn ("fldd%F1 %1,%0", operands);
2596
    }
2597
  else if (FP_REG_P (operands[1]))
2598
    {
2599
      output_asm_insn ("fstd%F0 %1,%0", operands);
2600
    }
2601
  else
2602
    {
2603
      rtx xoperands[2];
2604
 
2605
      gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2606
 
2607
      /* This is a pain.  You have to be prepared to deal with an
2608
         arbitrary address here including pre/post increment/decrement.
2609
 
2610
         so avoid this in the MD.  */
2611
      gcc_assert (GET_CODE (operands[0]) == REG);
2612
 
2613
      xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2614
      xoperands[0] = operands[0];
2615
      output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2616
    }
2617
  return "";
2618
}
2619
 
2620
/* Return a REG that occurs in ADDR with coefficient 1.
2621
   ADDR can be effectively incremented by incrementing REG.  */
2622
 
2623
static rtx
2624
find_addr_reg (rtx addr)
2625
{
2626
  while (GET_CODE (addr) == PLUS)
2627
    {
2628
      if (GET_CODE (XEXP (addr, 0)) == REG)
2629
        addr = XEXP (addr, 0);
2630
      else if (GET_CODE (XEXP (addr, 1)) == REG)
2631
        addr = XEXP (addr, 1);
2632
      else if (CONSTANT_P (XEXP (addr, 0)))
2633
        addr = XEXP (addr, 1);
2634
      else if (CONSTANT_P (XEXP (addr, 1)))
2635
        addr = XEXP (addr, 0);
2636
      else
2637
        gcc_unreachable ();
2638
    }
2639
  gcc_assert (GET_CODE (addr) == REG);
2640
  return addr;
2641
}
2642
 
2643
/* Emit code to perform a block move.
2644
 
2645
   OPERANDS[0] is the destination pointer as a REG, clobbered.
2646
   OPERANDS[1] is the source pointer as a REG, clobbered.
2647
   OPERANDS[2] is a register for temporary storage.
2648
   OPERANDS[3] is a register for temporary storage.
2649
   OPERANDS[4] is the size as a CONST_INT
2650
   OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2651
   OPERANDS[6] is another temporary register.  */
2652
 
2653
const char *
2654
output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2655
{
2656
  int align = INTVAL (operands[5]);
2657
  unsigned long n_bytes = INTVAL (operands[4]);
2658
 
2659
  /* We can't move more than a word at a time because the PA
2660
     has no longer integer move insns.  (Could use fp mem ops?)  */
2661
  if (align > (TARGET_64BIT ? 8 : 4))
2662
    align = (TARGET_64BIT ? 8 : 4);
2663
 
2664
  /* Note that we know each loop below will execute at least twice
2665
     (else we would have open-coded the copy).  */
2666
  switch (align)
2667
    {
2668
      case 8:
2669
        /* Pre-adjust the loop counter.  */
2670
        operands[4] = GEN_INT (n_bytes - 16);
2671
        output_asm_insn ("ldi %4,%2", operands);
2672
 
2673
        /* Copying loop.  */
2674
        output_asm_insn ("ldd,ma 8(%1),%3", operands);
2675
        output_asm_insn ("ldd,ma 8(%1),%6", operands);
2676
        output_asm_insn ("std,ma %3,8(%0)", operands);
2677
        output_asm_insn ("addib,>= -16,%2,.-12", operands);
2678
        output_asm_insn ("std,ma %6,8(%0)", operands);
2679
 
2680
        /* Handle the residual.  There could be up to 7 bytes of
2681
           residual to copy!  */
2682
        if (n_bytes % 16 != 0)
2683
          {
2684
            operands[4] = GEN_INT (n_bytes % 8);
2685
            if (n_bytes % 16 >= 8)
2686
              output_asm_insn ("ldd,ma 8(%1),%3", operands);
2687
            if (n_bytes % 8 != 0)
2688
              output_asm_insn ("ldd 0(%1),%6", operands);
2689
            if (n_bytes % 16 >= 8)
2690
              output_asm_insn ("std,ma %3,8(%0)", operands);
2691
            if (n_bytes % 8 != 0)
2692
              output_asm_insn ("stdby,e %6,%4(%0)", operands);
2693
          }
2694
        return "";
2695
 
2696
      case 4:
2697
        /* Pre-adjust the loop counter.  */
2698
        operands[4] = GEN_INT (n_bytes - 8);
2699
        output_asm_insn ("ldi %4,%2", operands);
2700
 
2701
        /* Copying loop.  */
2702
        output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2703
        output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2704
        output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2705
        output_asm_insn ("addib,>= -8,%2,.-12", operands);
2706
        output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2707
 
2708
        /* Handle the residual.  There could be up to 7 bytes of
2709
           residual to copy!  */
2710
        if (n_bytes % 8 != 0)
2711
          {
2712
            operands[4] = GEN_INT (n_bytes % 4);
2713
            if (n_bytes % 8 >= 4)
2714
              output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2715
            if (n_bytes % 4 != 0)
2716
              output_asm_insn ("ldw 0(%1),%6", operands);
2717
            if (n_bytes % 8 >= 4)
2718
              output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2719
            if (n_bytes % 4 != 0)
2720
              output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2721
          }
2722
        return "";
2723
 
2724
      case 2:
2725
        /* Pre-adjust the loop counter.  */
2726
        operands[4] = GEN_INT (n_bytes - 4);
2727
        output_asm_insn ("ldi %4,%2", operands);
2728
 
2729
        /* Copying loop.  */
2730
        output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2731
        output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2732
        output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2733
        output_asm_insn ("addib,>= -4,%2,.-12", operands);
2734
        output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2735
 
2736
        /* Handle the residual.  */
2737
        if (n_bytes % 4 != 0)
2738
          {
2739
            if (n_bytes % 4 >= 2)
2740
              output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2741
            if (n_bytes % 2 != 0)
2742
              output_asm_insn ("ldb 0(%1),%6", operands);
2743
            if (n_bytes % 4 >= 2)
2744
              output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2745
            if (n_bytes % 2 != 0)
2746
              output_asm_insn ("stb %6,0(%0)", operands);
2747
          }
2748
        return "";
2749
 
2750
      case 1:
2751
        /* Pre-adjust the loop counter.  */
2752
        operands[4] = GEN_INT (n_bytes - 2);
2753
        output_asm_insn ("ldi %4,%2", operands);
2754
 
2755
        /* Copying loop.  */
2756
        output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2757
        output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2758
        output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2759
        output_asm_insn ("addib,>= -2,%2,.-12", operands);
2760
        output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2761
 
2762
        /* Handle the residual.  */
2763
        if (n_bytes % 2 != 0)
2764
          {
2765
            output_asm_insn ("ldb 0(%1),%3", operands);
2766
            output_asm_insn ("stb %3,0(%0)", operands);
2767
          }
2768
        return "";
2769
 
2770
      default:
2771
        gcc_unreachable ();
2772
    }
2773
}
2774
 
2775
/* Count the number of insns necessary to handle this block move.
2776
 
2777
   Basic structure is the same as emit_block_move, except that we
2778
   count insns rather than emit them.  */
2779
 
2780
static int
2781
compute_movmem_length (rtx insn)
2782
{
2783
  rtx pat = PATTERN (insn);
2784
  unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2785
  unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2786
  unsigned int n_insns = 0;
2787
 
2788
  /* We can't move more than four bytes at a time because the PA
2789
     has no longer integer move insns.  (Could use fp mem ops?)  */
2790
  if (align > (TARGET_64BIT ? 8 : 4))
2791
    align = (TARGET_64BIT ? 8 : 4);
2792
 
2793
  /* The basic copying loop.  */
2794
  n_insns = 6;
2795
 
2796
  /* Residuals.  */
2797
  if (n_bytes % (2 * align) != 0)
2798
    {
2799
      if ((n_bytes % (2 * align)) >= align)
2800
        n_insns += 2;
2801
 
2802
      if ((n_bytes % align) != 0)
2803
        n_insns += 2;
2804
    }
2805
 
2806
  /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
2807
  return n_insns * 4;
2808
}
2809
 
2810
/* Emit code to perform a block clear.
2811
 
2812
   OPERANDS[0] is the destination pointer as a REG, clobbered.
2813
   OPERANDS[1] is a register for temporary storage.
2814
   OPERANDS[2] is the size as a CONST_INT
2815
   OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
2816
 
2817
const char *
2818
output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2819
{
2820
  int align = INTVAL (operands[3]);
2821
  unsigned long n_bytes = INTVAL (operands[2]);
2822
 
2823
  /* We can't clear more than a word at a time because the PA
2824
     has no longer integer move insns.  */
2825
  if (align > (TARGET_64BIT ? 8 : 4))
2826
    align = (TARGET_64BIT ? 8 : 4);
2827
 
2828
  /* Note that we know each loop below will execute at least twice
2829
     (else we would have open-coded the copy).  */
2830
  switch (align)
2831
    {
2832
      case 8:
2833
        /* Pre-adjust the loop counter.  */
2834
        operands[2] = GEN_INT (n_bytes - 16);
2835
        output_asm_insn ("ldi %2,%1", operands);
2836
 
2837
        /* Loop.  */
2838
        output_asm_insn ("std,ma %%r0,8(%0)", operands);
2839
        output_asm_insn ("addib,>= -16,%1,.-4", operands);
2840
        output_asm_insn ("std,ma %%r0,8(%0)", operands);
2841
 
2842
        /* Handle the residual.  There could be up to 7 bytes of
2843
           residual to copy!  */
2844
        if (n_bytes % 16 != 0)
2845
          {
2846
            operands[2] = GEN_INT (n_bytes % 8);
2847
            if (n_bytes % 16 >= 8)
2848
              output_asm_insn ("std,ma %%r0,8(%0)", operands);
2849
            if (n_bytes % 8 != 0)
2850
              output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2851
          }
2852
        return "";
2853
 
2854
      case 4:
2855
        /* Pre-adjust the loop counter.  */
2856
        operands[2] = GEN_INT (n_bytes - 8);
2857
        output_asm_insn ("ldi %2,%1", operands);
2858
 
2859
        /* Loop.  */
2860
        output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2861
        output_asm_insn ("addib,>= -8,%1,.-4", operands);
2862
        output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2863
 
2864
        /* Handle the residual.  There could be up to 7 bytes of
2865
           residual to copy!  */
2866
        if (n_bytes % 8 != 0)
2867
          {
2868
            operands[2] = GEN_INT (n_bytes % 4);
2869
            if (n_bytes % 8 >= 4)
2870
              output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2871
            if (n_bytes % 4 != 0)
2872
              output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2873
          }
2874
        return "";
2875
 
2876
      case 2:
2877
        /* Pre-adjust the loop counter.  */
2878
        operands[2] = GEN_INT (n_bytes - 4);
2879
        output_asm_insn ("ldi %2,%1", operands);
2880
 
2881
        /* Loop.  */
2882
        output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2883
        output_asm_insn ("addib,>= -4,%1,.-4", operands);
2884
        output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2885
 
2886
        /* Handle the residual.  */
2887
        if (n_bytes % 4 != 0)
2888
          {
2889
            if (n_bytes % 4 >= 2)
2890
              output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2891
            if (n_bytes % 2 != 0)
2892
              output_asm_insn ("stb %%r0,0(%0)", operands);
2893
          }
2894
        return "";
2895
 
2896
      case 1:
2897
        /* Pre-adjust the loop counter.  */
2898
        operands[2] = GEN_INT (n_bytes - 2);
2899
        output_asm_insn ("ldi %2,%1", operands);
2900
 
2901
        /* Loop.  */
2902
        output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2903
        output_asm_insn ("addib,>= -2,%1,.-4", operands);
2904
        output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2905
 
2906
        /* Handle the residual.  */
2907
        if (n_bytes % 2 != 0)
2908
          output_asm_insn ("stb %%r0,0(%0)", operands);
2909
 
2910
        return "";
2911
 
2912
      default:
2913
        gcc_unreachable ();
2914
    }
2915
}
2916
 
2917
/* Count the number of insns necessary to handle this block move.
2918
 
2919
   Basic structure is the same as emit_block_move, except that we
2920
   count insns rather than emit them.  */
2921
 
2922
static int
2923
compute_clrmem_length (rtx insn)
2924
{
2925
  rtx pat = PATTERN (insn);
2926
  unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2927
  unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2928
  unsigned int n_insns = 0;
2929
 
2930
  /* We can't clear more than a word at a time because the PA
2931
     has no longer integer move insns.  */
2932
  if (align > (TARGET_64BIT ? 8 : 4))
2933
    align = (TARGET_64BIT ? 8 : 4);
2934
 
2935
  /* The basic loop.  */
2936
  n_insns = 4;
2937
 
2938
  /* Residuals.  */
2939
  if (n_bytes % (2 * align) != 0)
2940
    {
2941
      if ((n_bytes % (2 * align)) >= align)
2942
        n_insns++;
2943
 
2944
      if ((n_bytes % align) != 0)
2945
        n_insns++;
2946
    }
2947
 
2948
  /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
2949
  return n_insns * 4;
2950
}
2951
 
2952
 
2953
const char *
2954
output_and (rtx *operands)
2955
{
2956
  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2957
    {
2958
      unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2959
      int ls0, ls1, ms0, p, len;
2960
 
2961
      for (ls0 = 0; ls0 < 32; ls0++)
2962
        if ((mask & (1 << ls0)) == 0)
2963
          break;
2964
 
2965
      for (ls1 = ls0; ls1 < 32; ls1++)
2966
        if ((mask & (1 << ls1)) != 0)
2967
          break;
2968
 
2969
      for (ms0 = ls1; ms0 < 32; ms0++)
2970
        if ((mask & (1 << ms0)) == 0)
2971
          break;
2972
 
2973
      gcc_assert (ms0 == 32);
2974
 
2975
      if (ls1 == 32)
2976
        {
2977
          len = ls0;
2978
 
2979
          gcc_assert (len);
2980
 
2981
          operands[2] = GEN_INT (len);
2982
          return "{extru|extrw,u} %1,31,%2,%0";
2983
        }
2984
      else
2985
        {
2986
          /* We could use this `depi' for the case above as well, but `depi'
2987
             requires one more register file access than an `extru'.  */
2988
 
2989
          p = 31 - ls0;
2990
          len = ls1 - ls0;
2991
 
2992
          operands[2] = GEN_INT (p);
2993
          operands[3] = GEN_INT (len);
2994
          return "{depi|depwi} 0,%2,%3,%0";
2995
        }
2996
    }
2997
  else
2998
    return "and %1,%2,%0";
2999
}
3000
 
3001
/* Return a string to perform a bitwise-and of operands[1] with operands[2]
3002
   storing the result in operands[0].  */
3003
const char *
3004
output_64bit_and (rtx *operands)
3005
{
3006
  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3007
    {
3008
      unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3009
      int ls0, ls1, ms0, p, len;
3010
 
3011
      for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3012
        if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3013
          break;
3014
 
3015
      for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3016
        if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3017
          break;
3018
 
3019
      for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3020
        if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3021
          break;
3022
 
3023
      gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3024
 
3025
      if (ls1 == HOST_BITS_PER_WIDE_INT)
3026
        {
3027
          len = ls0;
3028
 
3029
          gcc_assert (len);
3030
 
3031
          operands[2] = GEN_INT (len);
3032
          return "extrd,u %1,63,%2,%0";
3033
        }
3034
      else
3035
        {
3036
          /* We could use this `depi' for the case above as well, but `depi'
3037
             requires one more register file access than an `extru'.  */
3038
 
3039
          p = 63 - ls0;
3040
          len = ls1 - ls0;
3041
 
3042
          operands[2] = GEN_INT (p);
3043
          operands[3] = GEN_INT (len);
3044
          return "depdi 0,%2,%3,%0";
3045
        }
3046
    }
3047
  else
3048
    return "and %1,%2,%0";
3049
}
3050
 
3051
const char *
3052
output_ior (rtx *operands)
3053
{
3054
  unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3055
  int bs0, bs1, p, len;
3056
 
3057
  if (INTVAL (operands[2]) == 0)
3058
    return "copy %1,%0";
3059
 
3060
  for (bs0 = 0; bs0 < 32; bs0++)
3061
    if ((mask & (1 << bs0)) != 0)
3062
      break;
3063
 
3064
  for (bs1 = bs0; bs1 < 32; bs1++)
3065
    if ((mask & (1 << bs1)) == 0)
3066
      break;
3067
 
3068
  gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3069
 
3070
  p = 31 - bs0;
3071
  len = bs1 - bs0;
3072
 
3073
  operands[2] = GEN_INT (p);
3074
  operands[3] = GEN_INT (len);
3075
  return "{depi|depwi} -1,%2,%3,%0";
3076
}
3077
 
3078
/* Return a string to perform a bitwise-and of operands[1] with operands[2]
3079
   storing the result in operands[0].  */
3080
const char *
3081
output_64bit_ior (rtx *operands)
3082
{
3083
  unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3084
  int bs0, bs1, p, len;
3085
 
3086
  if (INTVAL (operands[2]) == 0)
3087
    return "copy %1,%0";
3088
 
3089
  for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3090
    if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3091
      break;
3092
 
3093
  for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3094
    if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3095
      break;
3096
 
3097
  gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3098
              || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3099
 
3100
  p = 63 - bs0;
3101
  len = bs1 - bs0;
3102
 
3103
  operands[2] = GEN_INT (p);
3104
  operands[3] = GEN_INT (len);
3105
  return "depdi -1,%2,%3,%0";
3106
}
3107
 
3108
/* Target hook for assembling integer objects.  This code handles
3109
   aligned SI and DI integers specially since function references
3110
   must be preceded by P%.  */
3111
 
3112
static bool
3113
pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3114
{
3115
  if (size == UNITS_PER_WORD
3116
      && aligned_p
3117
      && function_label_operand (x, VOIDmode))
3118
    {
3119
      fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3120
      output_addr_const (asm_out_file, x);
3121
      fputc ('\n', asm_out_file);
3122
      return true;
3123
    }
3124
  return default_assemble_integer (x, size, aligned_p);
3125
}
3126
 
3127
/* Output an ascii string.  */
3128
void
3129
output_ascii (FILE *file, const char *p, int size)
3130
{
3131
  int i;
3132
  int chars_output;
3133
  unsigned char partial_output[16];     /* Max space 4 chars can occupy.  */
3134
 
3135
  /* The HP assembler can only take strings of 256 characters at one
3136
     time.  This is a limitation on input line length, *not* the
3137
     length of the string.  Sigh.  Even worse, it seems that the
3138
     restriction is in number of input characters (see \xnn &
3139
     \whatever).  So we have to do this very carefully.  */
3140
 
3141
  fputs ("\t.STRING \"", file);
3142
 
3143
  chars_output = 0;
3144
  for (i = 0; i < size; i += 4)
3145
    {
3146
      int co = 0;
3147
      int io = 0;
3148
      for (io = 0, co = 0; io < MIN (4, size - i); io++)
3149
        {
3150
          register unsigned int c = (unsigned char) p[i + io];
3151
 
3152
          if (c == '\"' || c == '\\')
3153
            partial_output[co++] = '\\';
3154
          if (c >= ' ' && c < 0177)
3155
            partial_output[co++] = c;
3156
          else
3157
            {
3158
              unsigned int hexd;
3159
              partial_output[co++] = '\\';
3160
              partial_output[co++] = 'x';
3161
              hexd =  c  / 16 - 0 + '0';
3162
              if (hexd > '9')
3163
                hexd -= '9' - 'a' + 1;
3164
              partial_output[co++] = hexd;
3165
              hexd =  c % 16 - 0 + '0';
3166
              if (hexd > '9')
3167
                hexd -= '9' - 'a' + 1;
3168
              partial_output[co++] = hexd;
3169
            }
3170
        }
3171
      if (chars_output + co > 243)
3172
        {
3173
          fputs ("\"\n\t.STRING \"", file);
3174
          chars_output = 0;
3175
        }
3176
      fwrite (partial_output, 1, (size_t) co, file);
3177
      chars_output += co;
3178
      co = 0;
3179
    }
3180
  fputs ("\"\n", file);
3181
}
3182
 
3183
/* Try to rewrite floating point comparisons & branches to avoid
3184
   useless add,tr insns.
3185
 
3186
   CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3187
   to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3188
   first attempt to remove useless add,tr insns.  It is zero
3189
   for the second pass as reorg sometimes leaves bogus REG_DEAD
3190
   notes lying around.
3191
 
3192
   When CHECK_NOTES is zero we can only eliminate add,tr insns
3193
   when there's a 1:1 correspondence between fcmp and ftest/fbranch
3194
   instructions.  */
3195
static void
3196
remove_useless_addtr_insns (int check_notes)
3197
{
3198
  rtx insn;
3199
  static int pass = 0;
3200
 
3201
  /* This is fairly cheap, so always run it when optimizing.  */
3202
  if (optimize > 0)
3203
    {
3204
      int fcmp_count = 0;
3205
      int fbranch_count = 0;
3206
 
3207
      /* Walk all the insns in this function looking for fcmp & fbranch
3208
         instructions.  Keep track of how many of each we find.  */
3209
      for (insn = get_insns (); insn; insn = next_insn (insn))
3210
        {
3211
          rtx tmp;
3212
 
3213
          /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3214
          if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3215
            continue;
3216
 
3217
          tmp = PATTERN (insn);
3218
 
3219
          /* It must be a set.  */
3220
          if (GET_CODE (tmp) != SET)
3221
            continue;
3222
 
3223
          /* If the destination is CCFP, then we've found an fcmp insn.  */
3224
          tmp = SET_DEST (tmp);
3225
          if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3226
            {
3227
              fcmp_count++;
3228
              continue;
3229
            }
3230
 
3231
          tmp = PATTERN (insn);
3232
          /* If this is an fbranch instruction, bump the fbranch counter.  */
3233
          if (GET_CODE (tmp) == SET
3234
              && SET_DEST (tmp) == pc_rtx
3235
              && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3236
              && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3237
              && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3238
              && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3239
            {
3240
              fbranch_count++;
3241
              continue;
3242
            }
3243
        }
3244
 
3245
 
3246
      /* Find all floating point compare + branch insns.  If possible,
3247
         reverse the comparison & the branch to avoid add,tr insns.  */
3248
      for (insn = get_insns (); insn; insn = next_insn (insn))
3249
        {
3250
          rtx tmp, next;
3251
 
3252
          /* Ignore anything that isn't an INSN.  */
3253
          if (GET_CODE (insn) != INSN)
3254
            continue;
3255
 
3256
          tmp = PATTERN (insn);
3257
 
3258
          /* It must be a set.  */
3259
          if (GET_CODE (tmp) != SET)
3260
            continue;
3261
 
3262
          /* The destination must be CCFP, which is register zero.  */
3263
          tmp = SET_DEST (tmp);
3264
          if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3265
            continue;
3266
 
3267
          /* INSN should be a set of CCFP.
3268
 
3269
             See if the result of this insn is used in a reversed FP
3270
             conditional branch.  If so, reverse our condition and
3271
             the branch.  Doing so avoids useless add,tr insns.  */
3272
          next = next_insn (insn);
3273
          while (next)
3274
            {
3275
              /* Jumps, calls and labels stop our search.  */
3276
              if (GET_CODE (next) == JUMP_INSN
3277
                  || GET_CODE (next) == CALL_INSN
3278
                  || GET_CODE (next) == CODE_LABEL)
3279
                break;
3280
 
3281
              /* As does another fcmp insn.  */
3282
              if (GET_CODE (next) == INSN
3283
                  && GET_CODE (PATTERN (next)) == SET
3284
                  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3285
                  && REGNO (SET_DEST (PATTERN (next))) == 0)
3286
                break;
3287
 
3288
              next = next_insn (next);
3289
            }
3290
 
3291
          /* Is NEXT_INSN a branch?  */
3292
          if (next
3293
              && GET_CODE (next) == JUMP_INSN)
3294
            {
3295
              rtx pattern = PATTERN (next);
3296
 
3297
              /* If it a reversed fp conditional branch (e.g. uses add,tr)
3298
                 and CCFP dies, then reverse our conditional and the branch
3299
                 to avoid the add,tr.  */
3300
              if (GET_CODE (pattern) == SET
3301
                  && SET_DEST (pattern) == pc_rtx
3302
                  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3303
                  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3304
                  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3305
                  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3306
                  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3307
                  && (fcmp_count == fbranch_count
3308
                      || (check_notes
3309
                          && find_regno_note (next, REG_DEAD, 0))))
3310
                {
3311
                  /* Reverse the branch.  */
3312
                  tmp = XEXP (SET_SRC (pattern), 1);
3313
                  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3314
                  XEXP (SET_SRC (pattern), 2) = tmp;
3315
                  INSN_CODE (next) = -1;
3316
 
3317
                  /* Reverse our condition.  */
3318
                  tmp = PATTERN (insn);
3319
                  PUT_CODE (XEXP (tmp, 1),
3320
                            (reverse_condition_maybe_unordered
3321
                             (GET_CODE (XEXP (tmp, 1)))));
3322
                }
3323
            }
3324
        }
3325
    }
3326
 
3327
  pass = !pass;
3328
 
3329
}
3330
 
3331
/* You may have trouble believing this, but this is the 32 bit HP-PA
3332
   stack layout.  Wow.
3333
 
3334
   Offset               Contents
3335
 
3336
   Variable arguments   (optional; any number may be allocated)
3337
 
3338
   SP-(4*(N+9))         arg word N
3339
        :                   :
3340
      SP-56             arg word 5
3341
      SP-52             arg word 4
3342
 
3343
   Fixed arguments      (must be allocated; may remain unused)
3344
 
3345
      SP-48             arg word 3
3346
      SP-44             arg word 2
3347
      SP-40             arg word 1
3348
      SP-36             arg word 0
3349
 
3350
   Frame Marker
3351
 
3352
      SP-32             External Data Pointer (DP)
3353
      SP-28             External sr4
3354
      SP-24             External/stub RP (RP')
3355
      SP-20             Current RP
3356
      SP-16             Static Link
3357
      SP-12             Clean up
3358
      SP-8              Calling Stub RP (RP'')
3359
      SP-4              Previous SP
3360
 
3361
   Top of Frame
3362
 
3363
      SP-0              Stack Pointer (points to next available address)
3364
 
3365
*/
3366
 
3367
/* This function saves registers as follows.  Registers marked with ' are
3368
   this function's registers (as opposed to the previous function's).
3369
   If a frame_pointer isn't needed, r4 is saved as a general register;
3370
   the space for the frame pointer is still allocated, though, to keep
3371
   things simple.
3372
 
3373
 
3374
   Top of Frame
3375
 
3376
       SP (FP')         Previous FP
3377
       SP + 4           Alignment filler (sigh)
3378
       SP + 8           Space for locals reserved here.
3379
       .
3380
       .
3381
       .
3382
       SP + n           All call saved register used.
3383
       .
3384
       .
3385
       .
3386
       SP + o           All call saved fp registers used.
3387
       .
3388
       .
3389
       .
3390
       SP + p (SP')     points to next available address.
3391
 
3392
*/
3393
 
3394
/* Global variables set by output_function_prologue().  */
3395
/* Size of frame.  Need to know this to emit return insns from
3396
   leaf procedures.  */
3397
static HOST_WIDE_INT actual_fsize, local_fsize;
3398
static int save_fregs;
3399
 
3400
/* Emit RTL to store REG at the memory location specified by BASE+DISP.
3401
   Handle case where DISP > 8k by using the add_high_const patterns.
3402
 
3403
   Note in DISP > 8k case, we will leave the high part of the address
3404
   in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3405
 
3406
static void
3407
store_reg (int reg, HOST_WIDE_INT disp, int base)
3408
{
3409
  rtx insn, dest, src, basereg;
3410
 
3411
  src = gen_rtx_REG (word_mode, reg);
3412
  basereg = gen_rtx_REG (Pmode, base);
3413
  if (VAL_14_BITS_P (disp))
3414
    {
3415
      dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3416
      insn = emit_move_insn (dest, src);
3417
    }
3418
  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3419
    {
3420
      rtx delta = GEN_INT (disp);
3421
      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3422
 
3423
      emit_move_insn (tmpreg, delta);
3424
      insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3425
      if (DO_FRAME_NOTES)
3426
        {
3427
          add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3428
                        gen_rtx_SET (VOIDmode, tmpreg,
3429
                                     gen_rtx_PLUS (Pmode, basereg, delta)));
3430
          RTX_FRAME_RELATED_P (insn) = 1;
3431
        }
3432
      dest = gen_rtx_MEM (word_mode, tmpreg);
3433
      insn = emit_move_insn (dest, src);
3434
    }
3435
  else
3436
    {
3437
      rtx delta = GEN_INT (disp);
3438
      rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3439
      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3440
 
3441
      emit_move_insn (tmpreg, high);
3442
      dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3443
      insn = emit_move_insn (dest, src);
3444
      if (DO_FRAME_NOTES)
3445
        add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3446
                      gen_rtx_SET (VOIDmode,
3447
                                   gen_rtx_MEM (word_mode,
3448
                                                gen_rtx_PLUS (word_mode,
3449
                                                              basereg,
3450
                                                              delta)),
3451
                                   src));
3452
    }
3453
 
3454
  if (DO_FRAME_NOTES)
3455
    RTX_FRAME_RELATED_P (insn) = 1;
3456
}
3457
 
3458
/* Emit RTL to store REG at the memory location specified by BASE and then
3459
   add MOD to BASE.  MOD must be <= 8k.  */
3460
 
3461
static void
3462
store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3463
{
3464
  rtx insn, basereg, srcreg, delta;
3465
 
3466
  gcc_assert (VAL_14_BITS_P (mod));
3467
 
3468
  basereg = gen_rtx_REG (Pmode, base);
3469
  srcreg = gen_rtx_REG (word_mode, reg);
3470
  delta = GEN_INT (mod);
3471
 
3472
  insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3473
  if (DO_FRAME_NOTES)
3474
    {
3475
      RTX_FRAME_RELATED_P (insn) = 1;
3476
 
3477
      /* RTX_FRAME_RELATED_P must be set on each frame related set
3478
         in a parallel with more than one element.  */
3479
      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3480
      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3481
    }
3482
}
3483
 
3484
/* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3485
   where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3486
   whether to add a frame note or not.
3487
 
3488
   In the DISP > 8k case, we leave the high part of the address in %r1.
3489
   There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3490
 
3491
static void
3492
set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3493
{
3494
  rtx insn;
3495
 
3496
  if (VAL_14_BITS_P (disp))
3497
    {
3498
      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3499
                             plus_constant (gen_rtx_REG (Pmode, base), disp));
3500
    }
3501
  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3502
    {
3503
      rtx basereg = gen_rtx_REG (Pmode, base);
3504
      rtx delta = GEN_INT (disp);
3505
      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3506
 
3507
      emit_move_insn (tmpreg, delta);
3508
      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3509
                             gen_rtx_PLUS (Pmode, tmpreg, basereg));
3510
      if (DO_FRAME_NOTES)
3511
        add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3512
                      gen_rtx_SET (VOIDmode, tmpreg,
3513
                                   gen_rtx_PLUS (Pmode, basereg, delta)));
3514
    }
3515
  else
3516
    {
3517
      rtx basereg = gen_rtx_REG (Pmode, base);
3518
      rtx delta = GEN_INT (disp);
3519
      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3520
 
3521
      emit_move_insn (tmpreg,
3522
                      gen_rtx_PLUS (Pmode, basereg,
3523
                                    gen_rtx_HIGH (Pmode, delta)));
3524
      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3525
                             gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3526
    }
3527
 
3528
  if (DO_FRAME_NOTES && note)
3529
    RTX_FRAME_RELATED_P (insn) = 1;
3530
}
3531
 
3532
HOST_WIDE_INT
3533
compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3534
{
3535
  int freg_saved = 0;
3536
  int i, j;
3537
 
3538
  /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3539
     be consistent with the rounding and size calculation done here.
3540
     Change them at the same time.  */
3541
 
3542
  /* We do our own stack alignment.  First, round the size of the
3543
     stack locals up to a word boundary.  */
3544
  size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3545
 
3546
  /* Space for previous frame pointer + filler.  If any frame is
3547
     allocated, we need to add in the STARTING_FRAME_OFFSET.  We
3548
     waste some space here for the sake of HP compatibility.  The
3549
     first slot is only used when the frame pointer is needed.  */
3550
  if (size || frame_pointer_needed)
3551
    size += STARTING_FRAME_OFFSET;
3552
 
3553
  /* If the current function calls __builtin_eh_return, then we need
3554
     to allocate stack space for registers that will hold data for
3555
     the exception handler.  */
3556
  if (DO_FRAME_NOTES && crtl->calls_eh_return)
3557
    {
3558
      unsigned int i;
3559
 
3560
      for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3561
        continue;
3562
      size += i * UNITS_PER_WORD;
3563
    }
3564
 
3565
  /* Account for space used by the callee general register saves.  */
3566
  for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3567
    if (df_regs_ever_live_p (i))
3568
      size += UNITS_PER_WORD;
3569
 
3570
  /* Account for space used by the callee floating point register saves.  */
3571
  for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3572
    if (df_regs_ever_live_p (i)
3573
        || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3574
      {
3575
        freg_saved = 1;
3576
 
3577
        /* We always save both halves of the FP register, so always
3578
           increment the frame size by 8 bytes.  */
3579
        size += 8;
3580
      }
3581
 
3582
  /* If any of the floating registers are saved, account for the
3583
     alignment needed for the floating point register save block.  */
3584
  if (freg_saved)
3585
    {
3586
      size = (size + 7) & ~7;
3587
      if (fregs_live)
3588
        *fregs_live = 1;
3589
    }
3590
 
3591
  /* The various ABIs include space for the outgoing parameters in the
3592
     size of the current function's stack frame.  We don't need to align
3593
     for the outgoing arguments as their alignment is set by the final
3594
     rounding for the frame as a whole.  */
3595
  size += crtl->outgoing_args_size;
3596
 
3597
  /* Allocate space for the fixed frame marker.  This space must be
3598
     allocated for any function that makes calls or allocates
3599
     stack space.  */
3600
  if (!current_function_is_leaf || size)
3601
    size += TARGET_64BIT ? 48 : 32;
3602
 
3603
  /* Finally, round to the preferred stack boundary.  */
3604
  return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3605
          & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3606
}
3607
 
3608
/* Generate the assembly code for function entry.  FILE is a stdio
3609
   stream to output the code to.  SIZE is an int: how many units of
3610
   temporary storage to allocate.
3611
 
3612
   Refer to the array `regs_ever_live' to determine which registers to
3613
   save; `regs_ever_live[I]' is nonzero if register number I is ever
3614
   used in the function.  This function is responsible for knowing
3615
   which registers should not be saved even if used.  */
3616
 
3617
/* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3618
   of memory.  If any fpu reg is used in the function, we allocate
3619
   such a block here, at the bottom of the frame, just in case it's needed.
3620
 
3621
   If this function is a leaf procedure, then we may choose not
3622
   to do a "save" insn.  The decision about whether or not
3623
   to do this is made in regclass.c.  */
3624
 
3625
static void
3626
pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3627
{
3628
  /* The function's label and associated .PROC must never be
3629
     separated and must be output *after* any profiling declarations
3630
     to avoid changing spaces/subspaces within a procedure.  */
3631
  ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3632
  fputs ("\t.PROC\n", file);
3633
 
3634
  /* hppa_expand_prologue does the dirty work now.  We just need
3635
     to output the assembler directives which denote the start
3636
     of a function.  */
3637
  fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3638
  if (current_function_is_leaf)
3639
    fputs (",NO_CALLS", file);
3640
  else
3641
    fputs (",CALLS", file);
3642
  if (rp_saved)
3643
    fputs (",SAVE_RP", file);
3644
 
3645
  /* The SAVE_SP flag is used to indicate that register %r3 is stored
3646
     at the beginning of the frame and that it is used as the frame
3647
     pointer for the frame.  We do this because our current frame
3648
     layout doesn't conform to that specified in the HP runtime
3649
     documentation and we need a way to indicate to programs such as
3650
     GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
3651
     isn't used by HP compilers but is supported by the assembler.
3652
     However, SAVE_SP is supposed to indicate that the previous stack
3653
     pointer has been saved in the frame marker.  */
3654
  if (frame_pointer_needed)
3655
    fputs (",SAVE_SP", file);
3656
 
3657
  /* Pass on information about the number of callee register saves
3658
     performed in the prologue.
3659
 
3660
     The compiler is supposed to pass the highest register number
3661
     saved, the assembler then has to adjust that number before
3662
     entering it into the unwind descriptor (to account for any
3663
     caller saved registers with lower register numbers than the
3664
     first callee saved register).  */
3665
  if (gr_saved)
3666
    fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3667
 
3668
  if (fr_saved)
3669
    fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3670
 
3671
  fputs ("\n\t.ENTRY\n", file);
3672
 
3673
  remove_useless_addtr_insns (0);
3674
}
3675
 
3676
void
3677
hppa_expand_prologue (void)
3678
{
3679
  int merge_sp_adjust_with_store = 0;
3680
  HOST_WIDE_INT size = get_frame_size ();
3681
  HOST_WIDE_INT offset;
3682
  int i;
3683
  rtx insn, tmpreg;
3684
 
3685
  gr_saved = 0;
3686
  fr_saved = 0;
3687
  save_fregs = 0;
3688
 
3689
  /* Compute total size for frame pointer, filler, locals and rounding to
3690
     the next word boundary.  Similar code appears in compute_frame_size
3691
     and must be changed in tandem with this code.  */
3692
  local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3693
  if (local_fsize || frame_pointer_needed)
3694
    local_fsize += STARTING_FRAME_OFFSET;
3695
 
3696
  actual_fsize = compute_frame_size (size, &save_fregs);
3697
 
3698
  /* Compute a few things we will use often.  */
3699
  tmpreg = gen_rtx_REG (word_mode, 1);
3700
 
3701
  /* Save RP first.  The calling conventions manual states RP will
3702
     always be stored into the caller's frame at sp - 20 or sp - 16
3703
     depending on which ABI is in use.  */
3704
  if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3705
    {
3706
      store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3707
      rp_saved = true;
3708
    }
3709
  else
3710
    rp_saved = false;
3711
 
3712
  /* Allocate the local frame and set up the frame pointer if needed.  */
3713
  if (actual_fsize != 0)
3714
    {
3715
      if (frame_pointer_needed)
3716
        {
3717
          /* Copy the old frame pointer temporarily into %r1.  Set up the
3718
             new stack pointer, then store away the saved old frame pointer
3719
             into the stack at sp and at the same time update the stack
3720
             pointer by actual_fsize bytes.  Two versions, first
3721
             handles small (<8k) frames.  The second handles large (>=8k)
3722
             frames.  */
3723
          insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3724
          if (DO_FRAME_NOTES)
3725
            RTX_FRAME_RELATED_P (insn) = 1;
3726
 
3727
          insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3728
          if (DO_FRAME_NOTES)
3729
            RTX_FRAME_RELATED_P (insn) = 1;
3730
 
3731
          if (VAL_14_BITS_P (actual_fsize))
3732
            store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3733
          else
3734
            {
3735
              /* It is incorrect to store the saved frame pointer at *sp,
3736
                 then increment sp (writes beyond the current stack boundary).
3737
 
3738
                 So instead use stwm to store at *sp and post-increment the
3739
                 stack pointer as an atomic operation.  Then increment sp to
3740
                 finish allocating the new frame.  */
3741
              HOST_WIDE_INT adjust1 = 8192 - 64;
3742
              HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3743
 
3744
              store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3745
              set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3746
                              adjust2, 1);
3747
            }
3748
 
3749
          /* We set SAVE_SP in frames that need a frame pointer.  Thus,
3750
             we need to store the previous stack pointer (frame pointer)
3751
             into the frame marker on targets that use the HP unwind
3752
             library.  This allows the HP unwind library to be used to
3753
             unwind GCC frames.  However, we are not fully compatible
3754
             with the HP library because our frame layout differs from
3755
             that specified in the HP runtime specification.
3756
 
3757
             We don't want a frame note on this instruction as the frame
3758
             marker moves during dynamic stack allocation.
3759
 
3760
             This instruction also serves as a blockage to prevent
3761
             register spills from being scheduled before the stack
3762
             pointer is raised.  This is necessary as we store
3763
             registers using the frame pointer as a base register,
3764
             and the frame pointer is set before sp is raised.  */
3765
          if (TARGET_HPUX_UNWIND_LIBRARY)
3766
            {
3767
              rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3768
                                       GEN_INT (TARGET_64BIT ? -8 : -4));
3769
 
3770
              emit_move_insn (gen_rtx_MEM (word_mode, addr),
3771
                              frame_pointer_rtx);
3772
            }
3773
          else
3774
            emit_insn (gen_blockage ());
3775
        }
3776
      /* no frame pointer needed.  */
3777
      else
3778
        {
3779
          /* In some cases we can perform the first callee register save
3780
             and allocating the stack frame at the same time.   If so, just
3781
             make a note of it and defer allocating the frame until saving
3782
             the callee registers.  */
3783
          if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3784
            merge_sp_adjust_with_store = 1;
3785
          /* Can not optimize.  Adjust the stack frame by actual_fsize
3786
             bytes.  */
3787
          else
3788
            set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3789
                            actual_fsize, 1);
3790
        }
3791
    }
3792
 
3793
  /* Normal register save.
3794
 
3795
     Do not save the frame pointer in the frame_pointer_needed case.  It
3796
     was done earlier.  */
3797
  if (frame_pointer_needed)
3798
    {
3799
      offset = local_fsize;
3800
 
3801
      /* Saving the EH return data registers in the frame is the simplest
3802
         way to get the frame unwind information emitted.  We put them
3803
         just before the general registers.  */
3804
      if (DO_FRAME_NOTES && crtl->calls_eh_return)
3805
        {
3806
          unsigned int i, regno;
3807
 
3808
          for (i = 0; ; ++i)
3809
            {
3810
              regno = EH_RETURN_DATA_REGNO (i);
3811
              if (regno == INVALID_REGNUM)
3812
                break;
3813
 
3814
              store_reg (regno, offset, FRAME_POINTER_REGNUM);
3815
              offset += UNITS_PER_WORD;
3816
            }
3817
        }
3818
 
3819
      for (i = 18; i >= 4; i--)
3820
        if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3821
          {
3822
            store_reg (i, offset, FRAME_POINTER_REGNUM);
3823
            offset += UNITS_PER_WORD;
3824
            gr_saved++;
3825
          }
3826
      /* Account for %r3 which is saved in a special place.  */
3827
      gr_saved++;
3828
    }
3829
  /* No frame pointer needed.  */
3830
  else
3831
    {
3832
      offset = local_fsize - actual_fsize;
3833
 
3834
      /* Saving the EH return data registers in the frame is the simplest
3835
         way to get the frame unwind information emitted.  */
3836
      if (DO_FRAME_NOTES && crtl->calls_eh_return)
3837
        {
3838
          unsigned int i, regno;
3839
 
3840
          for (i = 0; ; ++i)
3841
            {
3842
              regno = EH_RETURN_DATA_REGNO (i);
3843
              if (regno == INVALID_REGNUM)
3844
                break;
3845
 
3846
              /* If merge_sp_adjust_with_store is nonzero, then we can
3847
                 optimize the first save.  */
3848
              if (merge_sp_adjust_with_store)
3849
                {
3850
                  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3851
                  merge_sp_adjust_with_store = 0;
3852
                }
3853
              else
3854
                store_reg (regno, offset, STACK_POINTER_REGNUM);
3855
              offset += UNITS_PER_WORD;
3856
            }
3857
        }
3858
 
3859
      for (i = 18; i >= 3; i--)
3860
        if (df_regs_ever_live_p (i) && ! call_used_regs[i])
3861
          {
3862
            /* If merge_sp_adjust_with_store is nonzero, then we can
3863
               optimize the first GR save.  */
3864
            if (merge_sp_adjust_with_store)
3865
              {
3866
                store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3867
                merge_sp_adjust_with_store = 0;
3868
              }
3869
            else
3870
              store_reg (i, offset, STACK_POINTER_REGNUM);
3871
            offset += UNITS_PER_WORD;
3872
            gr_saved++;
3873
          }
3874
 
3875
      /* If we wanted to merge the SP adjustment with a GR save, but we never
3876
         did any GR saves, then just emit the adjustment here.  */
3877
      if (merge_sp_adjust_with_store)
3878
        set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3879
                        actual_fsize, 1);
3880
    }
3881
 
3882
  /* The hppa calling conventions say that %r19, the pic offset
3883
     register, is saved at sp - 32 (in this function's frame)
3884
     when generating PIC code.  FIXME:  What is the correct thing
3885
     to do for functions which make no calls and allocate no
3886
     frame?  Do we need to allocate a frame, or can we just omit
3887
     the save?   For now we'll just omit the save.
3888
 
3889
     We don't want a note on this insn as the frame marker can
3890
     move if there is a dynamic stack allocation.  */
3891
  if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3892
    {
3893
      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3894
 
3895
      emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3896
 
3897
    }
3898
 
3899
  /* Align pointer properly (doubleword boundary).  */
3900
  offset = (offset + 7) & ~7;
3901
 
3902
  /* Floating point register store.  */
3903
  if (save_fregs)
3904
    {
3905
      rtx base;
3906
 
3907
      /* First get the frame or stack pointer to the start of the FP register
3908
         save area.  */
3909
      if (frame_pointer_needed)
3910
        {
3911
          set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3912
          base = frame_pointer_rtx;
3913
        }
3914
      else
3915
        {
3916
          set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3917
          base = stack_pointer_rtx;
3918
        }
3919
 
3920
      /* Now actually save the FP registers.  */
3921
      for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3922
        {
3923
          if (df_regs_ever_live_p (i)
3924
              || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3925
            {
3926
              rtx addr, insn, reg;
3927
              addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3928
              reg = gen_rtx_REG (DFmode, i);
3929
              insn = emit_move_insn (addr, reg);
3930
              if (DO_FRAME_NOTES)
3931
                {
3932
                  RTX_FRAME_RELATED_P (insn) = 1;
3933
                  if (TARGET_64BIT)
3934
                    {
3935
                      rtx mem = gen_rtx_MEM (DFmode,
3936
                                             plus_constant (base, offset));
3937
                      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3938
                                    gen_rtx_SET (VOIDmode, mem, reg));
3939
                    }
3940
                  else
3941
                    {
3942
                      rtx meml = gen_rtx_MEM (SFmode,
3943
                                              plus_constant (base, offset));
3944
                      rtx memr = gen_rtx_MEM (SFmode,
3945
                                              plus_constant (base, offset + 4));
3946
                      rtx regl = gen_rtx_REG (SFmode, i);
3947
                      rtx regr = gen_rtx_REG (SFmode, i + 1);
3948
                      rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3949
                      rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3950
                      rtvec vec;
3951
 
3952
                      RTX_FRAME_RELATED_P (setl) = 1;
3953
                      RTX_FRAME_RELATED_P (setr) = 1;
3954
                      vec = gen_rtvec (2, setl, setr);
3955
                      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3956
                                    gen_rtx_SEQUENCE (VOIDmode, vec));
3957
                    }
3958
                }
3959
              offset += GET_MODE_SIZE (DFmode);
3960
              fr_saved++;
3961
            }
3962
        }
3963
    }
3964
}
3965
 
3966
/* Emit RTL to load REG from the memory location specified by BASE+DISP.
3967
   Handle case where DISP > 8k by using the add_high_const patterns.  */
3968
 
3969
static void
3970
load_reg (int reg, HOST_WIDE_INT disp, int base)
3971
{
3972
  rtx dest = gen_rtx_REG (word_mode, reg);
3973
  rtx basereg = gen_rtx_REG (Pmode, base);
3974
  rtx src;
3975
 
3976
  if (VAL_14_BITS_P (disp))
3977
    src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3978
  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3979
    {
3980
      rtx delta = GEN_INT (disp);
3981
      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3982
 
3983
      emit_move_insn (tmpreg, delta);
3984
      if (TARGET_DISABLE_INDEXING)
3985
        {
3986
          emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3987
          src = gen_rtx_MEM (word_mode, tmpreg);
3988
        }
3989
      else
3990
        src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3991
    }
3992
  else
3993
    {
3994
      rtx delta = GEN_INT (disp);
3995
      rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3996
      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3997
 
3998
      emit_move_insn (tmpreg, high);
3999
      src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4000
    }
4001
 
4002
  emit_move_insn (dest, src);
4003
}
4004
 
4005
/* Update the total code bytes output to the text section.  */
4006
 
4007
static void
4008
update_total_code_bytes (unsigned int nbytes)
4009
{
4010
  if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4011
      && !IN_NAMED_SECTION_P (cfun->decl))
4012
    {
4013
      unsigned int old_total = total_code_bytes;
4014
 
4015
      total_code_bytes += nbytes;
4016
 
4017
      /* Be prepared to handle overflows.  */
4018
      if (old_total > total_code_bytes)
4019
        total_code_bytes = UINT_MAX;
4020
    }
4021
}
4022
 
4023
/* This function generates the assembly code for function exit.
4024
   Args are as for output_function_prologue ().
4025
 
4026
   The function epilogue should not depend on the current stack
4027
   pointer!  It should use the frame pointer only.  This is mandatory
4028
   because of alloca; we also take advantage of it to omit stack
4029
   adjustments before returning.  */
4030
 
4031
static void
4032
pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4033
{
4034
  rtx insn = get_last_insn ();
4035
 
4036
  last_address = 0;
4037
 
4038
  /* hppa_expand_epilogue does the dirty work now.  We just need
4039
     to output the assembler directives which denote the end
4040
     of a function.
4041
 
4042
     To make debuggers happy, emit a nop if the epilogue was completely
4043
     eliminated due to a volatile call as the last insn in the
4044
     current function.  That way the return address (in %r2) will
4045
     always point to a valid instruction in the current function.  */
4046
 
4047
  /* Get the last real insn.  */
4048
  if (GET_CODE (insn) == NOTE)
4049
    insn = prev_real_insn (insn);
4050
 
4051
  /* If it is a sequence, then look inside.  */
4052
  if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4053
    insn = XVECEXP (PATTERN (insn), 0, 0);
4054
 
4055
  /* If insn is a CALL_INSN, then it must be a call to a volatile
4056
     function (otherwise there would be epilogue insns).  */
4057
  if (insn && GET_CODE (insn) == CALL_INSN)
4058
    {
4059
      fputs ("\tnop\n", file);
4060
      last_address += 4;
4061
    }
4062
 
4063
  fputs ("\t.EXIT\n\t.PROCEND\n", file);
4064
 
4065
  if (TARGET_SOM && TARGET_GAS)
4066
    {
4067
      /* We done with this subspace except possibly for some additional
4068
         debug information.  Forget that we are in this subspace to ensure
4069
         that the next function is output in its own subspace.  */
4070
      in_section = NULL;
4071
      cfun->machine->in_nsubspa = 2;
4072
    }
4073
 
4074
  if (INSN_ADDRESSES_SET_P ())
4075
    {
4076
      insn = get_last_nonnote_insn ();
4077
      last_address += INSN_ADDRESSES (INSN_UID (insn));
4078
      if (INSN_P (insn))
4079
        last_address += insn_default_length (insn);
4080
      last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4081
                      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4082
    }
4083
  else
4084
    last_address = UINT_MAX;
4085
 
4086
  /* Finally, update the total number of code bytes output so far.  */
4087
  update_total_code_bytes (last_address);
4088
}
4089
 
4090
void
4091
hppa_expand_epilogue (void)
4092
{
4093
  rtx tmpreg;
4094
  HOST_WIDE_INT offset;
4095
  HOST_WIDE_INT ret_off = 0;
4096
  int i;
4097
  int merge_sp_adjust_with_load = 0;
4098
 
4099
  /* We will use this often.  */
4100
  tmpreg = gen_rtx_REG (word_mode, 1);
4101
 
4102
  /* Try to restore RP early to avoid load/use interlocks when
4103
     RP gets used in the return (bv) instruction.  This appears to still
4104
     be necessary even when we schedule the prologue and epilogue.  */
4105
  if (rp_saved)
4106
    {
4107
      ret_off = TARGET_64BIT ? -16 : -20;
4108
      if (frame_pointer_needed)
4109
        {
4110
          load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4111
          ret_off = 0;
4112
        }
4113
      else
4114
        {
4115
          /* No frame pointer, and stack is smaller than 8k.  */
4116
          if (VAL_14_BITS_P (ret_off - actual_fsize))
4117
            {
4118
              load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4119
              ret_off = 0;
4120
            }
4121
        }
4122
    }
4123
 
4124
  /* General register restores.  */
4125
  if (frame_pointer_needed)
4126
    {
4127
      offset = local_fsize;
4128
 
4129
      /* If the current function calls __builtin_eh_return, then we need
4130
         to restore the saved EH data registers.  */
4131
      if (DO_FRAME_NOTES && crtl->calls_eh_return)
4132
        {
4133
          unsigned int i, regno;
4134
 
4135
          for (i = 0; ; ++i)
4136
            {
4137
              regno = EH_RETURN_DATA_REGNO (i);
4138
              if (regno == INVALID_REGNUM)
4139
                break;
4140
 
4141
              load_reg (regno, offset, FRAME_POINTER_REGNUM);
4142
              offset += UNITS_PER_WORD;
4143
            }
4144
        }
4145
 
4146
      for (i = 18; i >= 4; i--)
4147
        if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4148
          {
4149
            load_reg (i, offset, FRAME_POINTER_REGNUM);
4150
            offset += UNITS_PER_WORD;
4151
          }
4152
    }
4153
  else
4154
    {
4155
      offset = local_fsize - actual_fsize;
4156
 
4157
      /* If the current function calls __builtin_eh_return, then we need
4158
         to restore the saved EH data registers.  */
4159
      if (DO_FRAME_NOTES && crtl->calls_eh_return)
4160
        {
4161
          unsigned int i, regno;
4162
 
4163
          for (i = 0; ; ++i)
4164
            {
4165
              regno = EH_RETURN_DATA_REGNO (i);
4166
              if (regno == INVALID_REGNUM)
4167
                break;
4168
 
4169
              /* Only for the first load.
4170
                 merge_sp_adjust_with_load holds the register load
4171
                 with which we will merge the sp adjustment.  */
4172
              if (merge_sp_adjust_with_load == 0
4173
                  && local_fsize == 0
4174
                  && VAL_14_BITS_P (-actual_fsize))
4175
                merge_sp_adjust_with_load = regno;
4176
              else
4177
                load_reg (regno, offset, STACK_POINTER_REGNUM);
4178
              offset += UNITS_PER_WORD;
4179
            }
4180
        }
4181
 
4182
      for (i = 18; i >= 3; i--)
4183
        {
4184
          if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4185
            {
4186
              /* Only for the first load.
4187
                 merge_sp_adjust_with_load holds the register load
4188
                 with which we will merge the sp adjustment.  */
4189
              if (merge_sp_adjust_with_load == 0
4190
                  && local_fsize == 0
4191
                  && VAL_14_BITS_P (-actual_fsize))
4192
                merge_sp_adjust_with_load = i;
4193
              else
4194
                load_reg (i, offset, STACK_POINTER_REGNUM);
4195
              offset += UNITS_PER_WORD;
4196
            }
4197
        }
4198
    }
4199
 
4200
  /* Align pointer properly (doubleword boundary).  */
4201
  offset = (offset + 7) & ~7;
4202
 
4203
  /* FP register restores.  */
4204
  if (save_fregs)
4205
    {
4206
      /* Adjust the register to index off of.  */
4207
      if (frame_pointer_needed)
4208
        set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4209
      else
4210
        set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4211
 
4212
      /* Actually do the restores now.  */
4213
      for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4214
        if (df_regs_ever_live_p (i)
4215
            || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4216
          {
4217
            rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4218
            rtx dest = gen_rtx_REG (DFmode, i);
4219
            emit_move_insn (dest, src);
4220
          }
4221
    }
4222
 
4223
  /* Emit a blockage insn here to keep these insns from being moved to
4224
     an earlier spot in the epilogue, or into the main instruction stream.
4225
 
4226
     This is necessary as we must not cut the stack back before all the
4227
     restores are finished.  */
4228
  emit_insn (gen_blockage ());
4229
 
4230
  /* Reset stack pointer (and possibly frame pointer).  The stack
4231
     pointer is initially set to fp + 64 to avoid a race condition.  */
4232
  if (frame_pointer_needed)
4233
    {
4234
      rtx delta = GEN_INT (-64);
4235
 
4236
      set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4237
      emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4238
    }
4239
  /* If we were deferring a callee register restore, do it now.  */
4240
  else if (merge_sp_adjust_with_load)
4241
    {
4242
      rtx delta = GEN_INT (-actual_fsize);
4243
      rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4244
 
4245
      emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4246
    }
4247
  else if (actual_fsize != 0)
4248
    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4249
                    - actual_fsize, 0);
4250
 
4251
  /* If we haven't restored %r2 yet (no frame pointer, and a stack
4252
     frame greater than 8k), do so now.  */
4253
  if (ret_off != 0)
4254
    load_reg (2, ret_off, STACK_POINTER_REGNUM);
4255
 
4256
  if (DO_FRAME_NOTES && crtl->calls_eh_return)
4257
    {
4258
      rtx sa = EH_RETURN_STACKADJ_RTX;
4259
 
4260
      emit_insn (gen_blockage ());
4261
      emit_insn (TARGET_64BIT
4262
                 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4263
                 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4264
    }
4265
}
4266
 
4267
rtx
4268
hppa_pic_save_rtx (void)
4269
{
4270
  return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4271
}
4272
 
4273
#ifndef NO_DEFERRED_PROFILE_COUNTERS
4274
#define NO_DEFERRED_PROFILE_COUNTERS 0
4275
#endif
4276
 
4277
 
4278
/* Vector of funcdef numbers.  */
4279
static VEC(int,heap) *funcdef_nos;
4280
 
4281
/* Output deferred profile counters.  */
4282
static void
4283
output_deferred_profile_counters (void)
4284
{
4285
  unsigned int i;
4286
  int align, n;
4287
 
4288
  if (VEC_empty (int, funcdef_nos))
4289
   return;
4290
 
4291
  switch_to_section (data_section);
4292
  align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4293
  ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4294
 
4295
  for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4296
    {
4297
      targetm.asm_out.internal_label (asm_out_file, "LP", n);
4298
      assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4299
    }
4300
 
4301
  VEC_free (int, heap, funcdef_nos);
4302
}
4303
 
4304
void
4305
hppa_profile_hook (int label_no)
4306
{
4307
  /* We use SImode for the address of the function in both 32 and
4308
     64-bit code to avoid having to provide DImode versions of the
4309
     lcla2 and load_offset_label_address insn patterns.  */
4310
  rtx reg = gen_reg_rtx (SImode);
4311
  rtx label_rtx = gen_label_rtx ();
4312
  rtx begin_label_rtx, call_insn;
4313
  char begin_label_name[16];
4314
 
4315
  ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4316
                               label_no);
4317
  begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4318
 
4319
  if (TARGET_64BIT)
4320
    emit_move_insn (arg_pointer_rtx,
4321
                    gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4322
                                  GEN_INT (64)));
4323
 
4324
  emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4325
 
4326
  /* The address of the function is loaded into %r25 with an instruction-
4327
     relative sequence that avoids the use of relocations.  The sequence
4328
     is split so that the load_offset_label_address instruction can
4329
     occupy the delay slot of the call to _mcount.  */
4330
  if (TARGET_PA_20)
4331
    emit_insn (gen_lcla2 (reg, label_rtx));
4332
  else
4333
    emit_insn (gen_lcla1 (reg, label_rtx));
4334
 
4335
  emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4336
                                            reg, begin_label_rtx, label_rtx));
4337
 
4338
#if !NO_DEFERRED_PROFILE_COUNTERS
4339
  {
4340
    rtx count_label_rtx, addr, r24;
4341
    char count_label_name[16];
4342
 
4343
    VEC_safe_push (int, heap, funcdef_nos, label_no);
4344
    ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4345
    count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4346
 
4347
    addr = force_reg (Pmode, count_label_rtx);
4348
    r24 = gen_rtx_REG (Pmode, 24);
4349
    emit_move_insn (r24, addr);
4350
 
4351
    call_insn =
4352
      emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4353
                                             gen_rtx_SYMBOL_REF (Pmode,
4354
                                                                 "_mcount")),
4355
                                GEN_INT (TARGET_64BIT ? 24 : 12)));
4356
 
4357
    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4358
  }
4359
#else
4360
 
4361
  call_insn =
4362
    emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4363
                                           gen_rtx_SYMBOL_REF (Pmode,
4364
                                                               "_mcount")),
4365
                              GEN_INT (TARGET_64BIT ? 16 : 8)));
4366
 
4367
#endif
4368
 
4369
  use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4370
  use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4371
 
4372
  /* Indicate the _mcount call cannot throw, nor will it execute a
4373
     non-local goto.  */
4374
  make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4375
}
4376
 
4377
/* Fetch the return address for the frame COUNT steps up from
4378
   the current frame, after the prologue.  FRAMEADDR is the
4379
   frame pointer of the COUNT frame.
4380
 
4381
   We want to ignore any export stub remnants here.  To handle this,
4382
   we examine the code at the return address, and if it is an export
4383
   stub, we return a memory rtx for the stub return address stored
4384
   at frame-24.
4385
 
4386
   The value returned is used in two different ways:
4387
 
4388
        1. To find a function's caller.
4389
 
4390
        2. To change the return address for a function.
4391
 
4392
   This function handles most instances of case 1; however, it will
4393
   fail if there are two levels of stubs to execute on the return
4394
   path.  The only way I believe that can happen is if the return value
4395
   needs a parameter relocation, which never happens for C code.
4396
 
4397
   This function handles most instances of case 2; however, it will
4398
   fail if we did not originally have stub code on the return path
4399
   but will need stub code on the new return path.  This can happen if
4400
   the caller & callee are both in the main program, but the new
4401
   return location is in a shared library.  */
4402
 
4403
rtx
4404
return_addr_rtx (int count, rtx frameaddr)
4405
{
4406
  rtx label;
4407
  rtx rp;
4408
  rtx saved_rp;
4409
  rtx ins;
4410
 
4411
  /* Instruction stream at the normal return address for the export stub:
4412
 
4413
        0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4414
        0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4415
        0x00011820 | stub+16:  mtsp r1,sr0
4416
        0xe0400002 | stub+20:  be,n 0(sr0,rp)
4417
 
4418
     0xe0400002 must be specified as -532676606 so that it won't be
4419
     rejected as an invalid immediate operand on 64-bit hosts.  */
4420
 
4421
  HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4422
  int i;
4423
 
4424
  if (count != 0)
4425
    return NULL_RTX;
4426
 
4427
  rp = get_hard_reg_initial_val (Pmode, 2);
4428
 
4429
  if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4430
    return rp;
4431
 
4432
  /* If there is no export stub then just use the value saved from
4433
     the return pointer register.  */
4434
 
4435
  saved_rp = gen_reg_rtx (Pmode);
4436
  emit_move_insn (saved_rp, rp);
4437
 
4438
  /* Get pointer to the instruction stream.  We have to mask out the
4439
     privilege level from the two low order bits of the return address
4440
     pointer here so that ins will point to the start of the first
4441
     instruction that would have been executed if we returned.  */
4442
  ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4443
  label = gen_label_rtx ();
4444
 
4445
  /* Check the instruction stream at the normal return address for the
4446
     export stub.  If it is an export stub, than our return address is
4447
     really in -24[frameaddr].  */
4448
 
4449
  for (i = 0; i < 3; i++)
4450
    {
4451
      rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4));
4452
      rtx op1 = GEN_INT (insns[i]);
4453
      emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4454
    }
4455
 
4456
  /* Here we know that our return address points to an export
4457
     stub.  We don't want to return the address of the export stub,
4458
     but rather the return address of the export stub.  That return
4459
     address is stored at -24[frameaddr].  */
4460
 
4461
  emit_move_insn (saved_rp,
4462
                  gen_rtx_MEM (Pmode,
4463
                               memory_address (Pmode,
4464
                                               plus_constant (frameaddr,
4465
                                                              -24))));
4466
 
4467
  emit_label (label);
4468
 
4469
  return saved_rp;
4470
}
4471
 
4472
void
4473
emit_bcond_fp (rtx operands[])
4474
{
4475
  enum rtx_code code = GET_CODE (operands[0]);
4476
  rtx operand0 = operands[1];
4477
  rtx operand1 = operands[2];
4478
  rtx label = operands[3];
4479
 
4480
  emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4481
                          gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4482
 
4483
  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4484
                               gen_rtx_IF_THEN_ELSE (VOIDmode,
4485
                                                     gen_rtx_fmt_ee (NE,
4486
                                                              VOIDmode,
4487
                                                              gen_rtx_REG (CCFPmode, 0),
4488
                                                              const0_rtx),
4489
                                                     gen_rtx_LABEL_REF (VOIDmode, label),
4490
                                                     pc_rtx)));
4491
 
4492
}
4493
 
4494
/* Adjust the cost of a scheduling dependency.  Return the new cost of
4495
   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4496
 
4497
static int
4498
pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4499
{
4500
  enum attr_type attr_type;
4501
 
4502
  /* Don't adjust costs for a pa8000 chip, also do not adjust any
4503
     true dependencies as they are described with bypasses now.  */
4504
  if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4505
    return cost;
4506
 
4507
  if (! recog_memoized (insn))
4508
    return 0;
4509
 
4510
  attr_type = get_attr_type (insn);
4511
 
4512
  switch (REG_NOTE_KIND (link))
4513
    {
4514
    case REG_DEP_ANTI:
4515
      /* Anti dependency; DEP_INSN reads a register that INSN writes some
4516
         cycles later.  */
4517
 
4518
      if (attr_type == TYPE_FPLOAD)
4519
        {
4520
          rtx pat = PATTERN (insn);
4521
          rtx dep_pat = PATTERN (dep_insn);
4522
          if (GET_CODE (pat) == PARALLEL)
4523
            {
4524
              /* This happens for the fldXs,mb patterns.  */
4525
              pat = XVECEXP (pat, 0, 0);
4526
            }
4527
          if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4528
            /* If this happens, we have to extend this to schedule
4529
               optimally.  Return 0 for now.  */
4530
          return 0;
4531
 
4532
          if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4533
            {
4534
              if (! recog_memoized (dep_insn))
4535
                return 0;
4536
              switch (get_attr_type (dep_insn))
4537
                {
4538
                case TYPE_FPALU:
4539
                case TYPE_FPMULSGL:
4540
                case TYPE_FPMULDBL:
4541
                case TYPE_FPDIVSGL:
4542
                case TYPE_FPDIVDBL:
4543
                case TYPE_FPSQRTSGL:
4544
                case TYPE_FPSQRTDBL:
4545
                  /* A fpload can't be issued until one cycle before a
4546
                     preceding arithmetic operation has finished if
4547
                     the target of the fpload is any of the sources
4548
                     (or destination) of the arithmetic operation.  */
4549
                  return insn_default_latency (dep_insn) - 1;
4550
 
4551
                default:
4552
                  return 0;
4553
                }
4554
            }
4555
        }
4556
      else if (attr_type == TYPE_FPALU)
4557
        {
4558
          rtx pat = PATTERN (insn);
4559
          rtx dep_pat = PATTERN (dep_insn);
4560
          if (GET_CODE (pat) == PARALLEL)
4561
            {
4562
              /* This happens for the fldXs,mb patterns.  */
4563
              pat = XVECEXP (pat, 0, 0);
4564
            }
4565
          if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4566
            /* If this happens, we have to extend this to schedule
4567
               optimally.  Return 0 for now.  */
4568
          return 0;
4569
 
4570
          if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4571
            {
4572
              if (! recog_memoized (dep_insn))
4573
                return 0;
4574
              switch (get_attr_type (dep_insn))
4575
                {
4576
                case TYPE_FPDIVSGL:
4577
                case TYPE_FPDIVDBL:
4578
                case TYPE_FPSQRTSGL:
4579
                case TYPE_FPSQRTDBL:
4580
                  /* An ALU flop can't be issued until two cycles before a
4581
                     preceding divide or sqrt operation has finished if
4582
                     the target of the ALU flop is any of the sources
4583
                     (or destination) of the divide or sqrt operation.  */
4584
                  return insn_default_latency (dep_insn) - 2;
4585
 
4586
                default:
4587
                  return 0;
4588
                }
4589
            }
4590
        }
4591
 
4592
      /* For other anti dependencies, the cost is 0.  */
4593
      return 0;
4594
 
4595
    case REG_DEP_OUTPUT:
4596
      /* Output dependency; DEP_INSN writes a register that INSN writes some
4597
         cycles later.  */
4598
      if (attr_type == TYPE_FPLOAD)
4599
        {
4600
          rtx pat = PATTERN (insn);
4601
          rtx dep_pat = PATTERN (dep_insn);
4602
          if (GET_CODE (pat) == PARALLEL)
4603
            {
4604
              /* This happens for the fldXs,mb patterns.  */
4605
              pat = XVECEXP (pat, 0, 0);
4606
            }
4607
          if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4608
            /* If this happens, we have to extend this to schedule
4609
               optimally.  Return 0 for now.  */
4610
          return 0;
4611
 
4612
          if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4613
            {
4614
              if (! recog_memoized (dep_insn))
4615
                return 0;
4616
              switch (get_attr_type (dep_insn))
4617
                {
4618
                case TYPE_FPALU:
4619
                case TYPE_FPMULSGL:
4620
                case TYPE_FPMULDBL:
4621
                case TYPE_FPDIVSGL:
4622
                case TYPE_FPDIVDBL:
4623
                case TYPE_FPSQRTSGL:
4624
                case TYPE_FPSQRTDBL:
4625
                  /* A fpload can't be issued until one cycle before a
4626
                     preceding arithmetic operation has finished if
4627
                     the target of the fpload is the destination of the
4628
                     arithmetic operation.
4629
 
4630
                     Exception: For PA7100LC, PA7200 and PA7300, the cost
4631
                     is 3 cycles, unless they bundle together.   We also
4632
                     pay the penalty if the second insn is a fpload.  */
4633
                  return insn_default_latency (dep_insn) - 1;
4634
 
4635
                default:
4636
                  return 0;
4637
                }
4638
            }
4639
        }
4640
      else if (attr_type == TYPE_FPALU)
4641
        {
4642
          rtx pat = PATTERN (insn);
4643
          rtx dep_pat = PATTERN (dep_insn);
4644
          if (GET_CODE (pat) == PARALLEL)
4645
            {
4646
              /* This happens for the fldXs,mb patterns.  */
4647
              pat = XVECEXP (pat, 0, 0);
4648
            }
4649
          if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4650
            /* If this happens, we have to extend this to schedule
4651
               optimally.  Return 0 for now.  */
4652
          return 0;
4653
 
4654
          if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4655
            {
4656
              if (! recog_memoized (dep_insn))
4657
                return 0;
4658
              switch (get_attr_type (dep_insn))
4659
                {
4660
                case TYPE_FPDIVSGL:
4661
                case TYPE_FPDIVDBL:
4662
                case TYPE_FPSQRTSGL:
4663
                case TYPE_FPSQRTDBL:
4664
                  /* An ALU flop can't be issued until two cycles before a
4665
                     preceding divide or sqrt operation has finished if
4666
                     the target of the ALU flop is also the target of
4667
                     the divide or sqrt operation.  */
4668
                  return insn_default_latency (dep_insn) - 2;
4669
 
4670
                default:
4671
                  return 0;
4672
                }
4673
            }
4674
        }
4675
 
4676
      /* For other output dependencies, the cost is 0.  */
4677
      return 0;
4678
 
4679
    default:
4680
      gcc_unreachable ();
4681
    }
4682
}
4683
 
4684
/* Adjust scheduling priorities.  We use this to try and keep addil
4685
   and the next use of %r1 close together.  */
4686
static int
4687
pa_adjust_priority (rtx insn, int priority)
4688
{
4689
  rtx set = single_set (insn);
4690
  rtx src, dest;
4691
  if (set)
4692
    {
4693
      src = SET_SRC (set);
4694
      dest = SET_DEST (set);
4695
      if (GET_CODE (src) == LO_SUM
4696
          && symbolic_operand (XEXP (src, 1), VOIDmode)
4697
          && ! read_only_operand (XEXP (src, 1), VOIDmode))
4698
        priority >>= 3;
4699
 
4700
      else if (GET_CODE (src) == MEM
4701
               && GET_CODE (XEXP (src, 0)) == LO_SUM
4702
               && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4703
               && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4704
        priority >>= 1;
4705
 
4706
      else if (GET_CODE (dest) == MEM
4707
               && GET_CODE (XEXP (dest, 0)) == LO_SUM
4708
               && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4709
               && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4710
        priority >>= 3;
4711
    }
4712
  return priority;
4713
}
4714
 
4715
/* The 700 can only issue a single insn at a time.
4716
   The 7XXX processors can issue two insns at a time.
4717
   The 8000 can issue 4 insns at a time.  */
4718
static int
4719
pa_issue_rate (void)
4720
{
4721
  switch (pa_cpu)
4722
    {
4723
    case PROCESSOR_700:         return 1;
4724
    case PROCESSOR_7100:        return 2;
4725
    case PROCESSOR_7100LC:      return 2;
4726
    case PROCESSOR_7200:        return 2;
4727
    case PROCESSOR_7300:        return 2;
4728
    case PROCESSOR_8000:        return 4;
4729
 
4730
    default:
4731
      gcc_unreachable ();
4732
    }
4733
}
4734
 
4735
 
4736
 
4737
/* Return any length adjustment needed by INSN which already has its length
4738
   computed as LENGTH.   Return zero if no adjustment is necessary.
4739
 
4740
   For the PA: function calls, millicode calls, and backwards short
4741
   conditional branches with unfilled delay slots need an adjustment by +1
4742
   (to account for the NOP which will be inserted into the instruction stream).
4743
 
4744
   Also compute the length of an inline block move here as it is too
4745
   complicated to express as a length attribute in pa.md.  */
4746
int
4747
pa_adjust_insn_length (rtx insn, int length)
4748
{
4749
  rtx pat = PATTERN (insn);
4750
 
4751
  /* Jumps inside switch tables which have unfilled delay slots need
4752
     adjustment.  */
4753
  if (GET_CODE (insn) == JUMP_INSN
4754
      && GET_CODE (pat) == PARALLEL
4755
      && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4756
    return 4;
4757
  /* Millicode insn with an unfilled delay slot.  */
4758
  else if (GET_CODE (insn) == INSN
4759
           && GET_CODE (pat) != SEQUENCE
4760
           && GET_CODE (pat) != USE
4761
           && GET_CODE (pat) != CLOBBER
4762
           && get_attr_type (insn) == TYPE_MILLI)
4763
    return 4;
4764
  /* Block move pattern.  */
4765
  else if (GET_CODE (insn) == INSN
4766
           && GET_CODE (pat) == PARALLEL
4767
           && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4768
           && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4769
           && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4770
           && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4771
           && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4772
    return compute_movmem_length (insn) - 4;
4773
  /* Block clear pattern.  */
4774
  else if (GET_CODE (insn) == INSN
4775
           && GET_CODE (pat) == PARALLEL
4776
           && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4777
           && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4778
           && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4779
           && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4780
    return compute_clrmem_length (insn) - 4;
4781
  /* Conditional branch with an unfilled delay slot.  */
4782
  else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4783
    {
4784
      /* Adjust a short backwards conditional with an unfilled delay slot.  */
4785
      if (GET_CODE (pat) == SET
4786
          && length == 4
4787
          && JUMP_LABEL (insn) != NULL_RTX
4788
          && ! forward_branch_p (insn))
4789
        return 4;
4790
      else if (GET_CODE (pat) == PARALLEL
4791
               && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4792
               && length == 4)
4793
        return 4;
4794
      /* Adjust dbra insn with short backwards conditional branch with
4795
         unfilled delay slot -- only for case where counter is in a
4796
         general register register.  */
4797
      else if (GET_CODE (pat) == PARALLEL
4798
               && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4799
               && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4800
               && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4801
               && length == 4
4802
               && ! forward_branch_p (insn))
4803
        return 4;
4804
      else
4805
        return 0;
4806
    }
4807
  return 0;
4808
}
4809
 
4810
/* Print operand X (an rtx) in assembler syntax to file FILE.
4811
   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4812
   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
4813
 
4814
void
4815
print_operand (FILE *file, rtx x, int code)
4816
{
4817
  switch (code)
4818
    {
4819
    case '#':
4820
      /* Output a 'nop' if there's nothing for the delay slot.  */
4821
      if (dbr_sequence_length () == 0)
4822
        fputs ("\n\tnop", file);
4823
      return;
4824
    case '*':
4825
      /* Output a nullification completer if there's nothing for the */
4826
      /* delay slot or nullification is requested.  */
4827
      if (dbr_sequence_length () == 0 ||
4828
          (final_sequence &&
4829
           INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4830
        fputs (",n", file);
4831
      return;
4832
    case 'R':
4833
      /* Print out the second register name of a register pair.
4834
         I.e., R (6) => 7.  */
4835
      fputs (reg_names[REGNO (x) + 1], file);
4836
      return;
4837
    case 'r':
4838
      /* A register or zero.  */
4839
      if (x == const0_rtx
4840
          || (x == CONST0_RTX (DFmode))
4841
          || (x == CONST0_RTX (SFmode)))
4842
        {
4843
          fputs ("%r0", file);
4844
          return;
4845
        }
4846
      else
4847
        break;
4848
    case 'f':
4849
      /* A register or zero (floating point).  */
4850
      if (x == const0_rtx
4851
          || (x == CONST0_RTX (DFmode))
4852
          || (x == CONST0_RTX (SFmode)))
4853
        {
4854
          fputs ("%fr0", file);
4855
          return;
4856
        }
4857
      else
4858
        break;
4859
    case 'A':
4860
      {
4861
        rtx xoperands[2];
4862
 
4863
        xoperands[0] = XEXP (XEXP (x, 0), 0);
4864
        xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4865
        output_global_address (file, xoperands[1], 0);
4866
        fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4867
        return;
4868
      }
4869
 
4870
    case 'C':                   /* Plain (C)ondition */
4871
    case 'X':
4872
      switch (GET_CODE (x))
4873
        {
4874
        case EQ:
4875
          fputs ("=", file);  break;
4876
        case NE:
4877
          fputs ("<>", file);  break;
4878
        case GT:
4879
          fputs (">", file);  break;
4880
        case GE:
4881
          fputs (">=", file);  break;
4882
        case GEU:
4883
          fputs (">>=", file);  break;
4884
        case GTU:
4885
          fputs (">>", file);  break;
4886
        case LT:
4887
          fputs ("<", file);  break;
4888
        case LE:
4889
          fputs ("<=", file);  break;
4890
        case LEU:
4891
          fputs ("<<=", file);  break;
4892
        case LTU:
4893
          fputs ("<<", file);  break;
4894
        default:
4895
          gcc_unreachable ();
4896
        }
4897
      return;
4898
    case 'N':                   /* Condition, (N)egated */
4899
      switch (GET_CODE (x))
4900
        {
4901
        case EQ:
4902
          fputs ("<>", file);  break;
4903
        case NE:
4904
          fputs ("=", file);  break;
4905
        case GT:
4906
          fputs ("<=", file);  break;
4907
        case GE:
4908
          fputs ("<", file);  break;
4909
        case GEU:
4910
          fputs ("<<", file);  break;
4911
        case GTU:
4912
          fputs ("<<=", file);  break;
4913
        case LT:
4914
          fputs (">=", file);  break;
4915
        case LE:
4916
          fputs (">", file);  break;
4917
        case LEU:
4918
          fputs (">>", file);  break;
4919
        case LTU:
4920
          fputs (">>=", file);  break;
4921
        default:
4922
          gcc_unreachable ();
4923
        }
4924
      return;
4925
    /* For floating point comparisons.  Note that the output
4926
       predicates are the complement of the desired mode.  The
4927
       conditions for GT, GE, LT, LE and LTGT cause an invalid
4928
       operation exception if the result is unordered and this
4929
       exception is enabled in the floating-point status register.  */
4930
    case 'Y':
4931
      switch (GET_CODE (x))
4932
        {
4933
        case EQ:
4934
          fputs ("!=", file);  break;
4935
        case NE:
4936
          fputs ("=", file);  break;
4937
        case GT:
4938
          fputs ("!>", file);  break;
4939
        case GE:
4940
          fputs ("!>=", file);  break;
4941
        case LT:
4942
          fputs ("!<", file);  break;
4943
        case LE:
4944
          fputs ("!<=", file);  break;
4945
        case LTGT:
4946
          fputs ("!<>", file);  break;
4947
        case UNLE:
4948
          fputs ("!?<=", file);  break;
4949
        case UNLT:
4950
          fputs ("!?<", file);  break;
4951
        case UNGE:
4952
          fputs ("!?>=", file);  break;
4953
        case UNGT:
4954
          fputs ("!?>", file);  break;
4955
        case UNEQ:
4956
          fputs ("!?=", file);  break;
4957
        case UNORDERED:
4958
          fputs ("!?", file);  break;
4959
        case ORDERED:
4960
          fputs ("?", file);  break;
4961
        default:
4962
          gcc_unreachable ();
4963
        }
4964
      return;
4965
    case 'S':                   /* Condition, operands are (S)wapped.  */
4966
      switch (GET_CODE (x))
4967
        {
4968
        case EQ:
4969
          fputs ("=", file);  break;
4970
        case NE:
4971
          fputs ("<>", file);  break;
4972
        case GT:
4973
          fputs ("<", file);  break;
4974
        case GE:
4975
          fputs ("<=", file);  break;
4976
        case GEU:
4977
          fputs ("<<=", file);  break;
4978
        case GTU:
4979
          fputs ("<<", file);  break;
4980
        case LT:
4981
          fputs (">", file);  break;
4982
        case LE:
4983
          fputs (">=", file);  break;
4984
        case LEU:
4985
          fputs (">>=", file);  break;
4986
        case LTU:
4987
          fputs (">>", file);  break;
4988
        default:
4989
          gcc_unreachable ();
4990
        }
4991
      return;
4992
    case 'B':                   /* Condition, (B)oth swapped and negate.  */
4993
      switch (GET_CODE (x))
4994
        {
4995
        case EQ:
4996
          fputs ("<>", file);  break;
4997
        case NE:
4998
          fputs ("=", file);  break;
4999
        case GT:
5000
          fputs (">=", file);  break;
5001
        case GE:
5002
          fputs (">", file);  break;
5003
        case GEU:
5004
          fputs (">>", file);  break;
5005
        case GTU:
5006
          fputs (">>=", file);  break;
5007
        case LT:
5008
          fputs ("<=", file);  break;
5009
        case LE:
5010
          fputs ("<", file);  break;
5011
        case LEU:
5012
          fputs ("<<", file);  break;
5013
        case LTU:
5014
          fputs ("<<=", file);  break;
5015
        default:
5016
          gcc_unreachable ();
5017
        }
5018
      return;
5019
    case 'k':
5020
      gcc_assert (GET_CODE (x) == CONST_INT);
5021
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5022
      return;
5023
    case 'Q':
5024
      gcc_assert (GET_CODE (x) == CONST_INT);
5025
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5026
      return;
5027
    case 'L':
5028
      gcc_assert (GET_CODE (x) == CONST_INT);
5029
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5030
      return;
5031
    case 'O':
5032
      gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5033
      fprintf (file, "%d", exact_log2 (INTVAL (x)));
5034
      return;
5035
    case 'p':
5036
      gcc_assert (GET_CODE (x) == CONST_INT);
5037
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5038
      return;
5039
    case 'P':
5040
      gcc_assert (GET_CODE (x) == CONST_INT);
5041
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5042
      return;
5043
    case 'I':
5044
      if (GET_CODE (x) == CONST_INT)
5045
        fputs ("i", file);
5046
      return;
5047
    case 'M':
5048
    case 'F':
5049
      switch (GET_CODE (XEXP (x, 0)))
5050
        {
5051
        case PRE_DEC:
5052
        case PRE_INC:
5053
          if (ASSEMBLER_DIALECT == 0)
5054
            fputs ("s,mb", file);
5055
          else
5056
            fputs (",mb", file);
5057
          break;
5058
        case POST_DEC:
5059
        case POST_INC:
5060
          if (ASSEMBLER_DIALECT == 0)
5061
            fputs ("s,ma", file);
5062
          else
5063
            fputs (",ma", file);
5064
          break;
5065
        case PLUS:
5066
          if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5067
              && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5068
            {
5069
              if (ASSEMBLER_DIALECT == 0)
5070
                fputs ("x", file);
5071
            }
5072
          else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5073
                   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5074
            {
5075
              if (ASSEMBLER_DIALECT == 0)
5076
                fputs ("x,s", file);
5077
              else
5078
                fputs (",s", file);
5079
            }
5080
          else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5081
            fputs ("s", file);
5082
          break;
5083
        default:
5084
          if (code == 'F' && ASSEMBLER_DIALECT == 0)
5085
            fputs ("s", file);
5086
          break;
5087
        }
5088
      return;
5089
    case 'G':
5090
      output_global_address (file, x, 0);
5091
      return;
5092
    case 'H':
5093
      output_global_address (file, x, 1);
5094
      return;
5095
    case 0:                      /* Don't do anything special */
5096
      break;
5097
    case 'Z':
5098
      {
5099
        unsigned op[3];
5100
        compute_zdepwi_operands (INTVAL (x), op);
5101
        fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5102
        return;
5103
      }
5104
    case 'z':
5105
      {
5106
        unsigned op[3];
5107
        compute_zdepdi_operands (INTVAL (x), op);
5108
        fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5109
        return;
5110
      }
5111
    case 'c':
5112
      /* We can get here from a .vtable_inherit due to our
5113
         CONSTANT_ADDRESS_P rejecting perfectly good constant
5114
         addresses.  */
5115
      break;
5116
    default:
5117
      gcc_unreachable ();
5118
    }
5119
  if (GET_CODE (x) == REG)
5120
    {
5121
      fputs (reg_names [REGNO (x)], file);
5122
      if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5123
        {
5124
          fputs ("R", file);
5125
          return;
5126
        }
5127
      if (FP_REG_P (x)
5128
          && GET_MODE_SIZE (GET_MODE (x)) <= 4
5129
          && (REGNO (x) & 1) == 0)
5130
        fputs ("L", file);
5131
    }
5132
  else if (GET_CODE (x) == MEM)
5133
    {
5134
      int size = GET_MODE_SIZE (GET_MODE (x));
5135
      rtx base = NULL_RTX;
5136
      switch (GET_CODE (XEXP (x, 0)))
5137
        {
5138
        case PRE_DEC:
5139
        case POST_DEC:
5140
          base = XEXP (XEXP (x, 0), 0);
5141
          fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5142
          break;
5143
        case PRE_INC:
5144
        case POST_INC:
5145
          base = XEXP (XEXP (x, 0), 0);
5146
          fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5147
          break;
5148
        case PLUS:
5149
          if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5150
            fprintf (file, "%s(%s)",
5151
                     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5152
                     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5153
          else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5154
            fprintf (file, "%s(%s)",
5155
                     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5156
                     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5157
          else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5158
                   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5159
            {
5160
              /* Because the REG_POINTER flag can get lost during reload,
5161
                 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5162
                 index and base registers in the combined move patterns.  */
5163
              rtx base = XEXP (XEXP (x, 0), 1);
5164
              rtx index = XEXP (XEXP (x, 0), 0);
5165
 
5166
              fprintf (file, "%s(%s)",
5167
                       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5168
            }
5169
          else
5170
            output_address (XEXP (x, 0));
5171
          break;
5172
        default:
5173
          output_address (XEXP (x, 0));
5174
          break;
5175
        }
5176
    }
5177
  else
5178
    output_addr_const (file, x);
5179
}
5180
 
5181
/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5182
 
5183
void
5184
output_global_address (FILE *file, rtx x, int round_constant)
5185
{
5186
 
5187
  /* Imagine  (high (const (plus ...))).  */
5188
  if (GET_CODE (x) == HIGH)
5189
    x = XEXP (x, 0);
5190
 
5191
  if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5192
    output_addr_const (file, x);
5193
  else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5194
    {
5195
      output_addr_const (file, x);
5196
      fputs ("-$global$", file);
5197
    }
5198
  else if (GET_CODE (x) == CONST)
5199
    {
5200
      const char *sep = "";
5201
      int offset = 0;            /* assembler wants -$global$ at end */
5202
      rtx base = NULL_RTX;
5203
 
5204
      switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5205
        {
5206
        case SYMBOL_REF:
5207
          base = XEXP (XEXP (x, 0), 0);
5208
          output_addr_const (file, base);
5209
          break;
5210
        case CONST_INT:
5211
          offset = INTVAL (XEXP (XEXP (x, 0), 0));
5212
          break;
5213
        default:
5214
          gcc_unreachable ();
5215
        }
5216
 
5217
      switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5218
        {
5219
        case SYMBOL_REF:
5220
          base = XEXP (XEXP (x, 0), 1);
5221
          output_addr_const (file, base);
5222
          break;
5223
        case CONST_INT:
5224
          offset = INTVAL (XEXP (XEXP (x, 0), 1));
5225
          break;
5226
        default:
5227
          gcc_unreachable ();
5228
        }
5229
 
5230
      /* How bogus.  The compiler is apparently responsible for
5231
         rounding the constant if it uses an LR field selector.
5232
 
5233
         The linker and/or assembler seem a better place since
5234
         they have to do this kind of thing already.
5235
 
5236
         If we fail to do this, HP's optimizing linker may eliminate
5237
         an addil, but not update the ldw/stw/ldo instruction that
5238
         uses the result of the addil.  */
5239
      if (round_constant)
5240
        offset = ((offset + 0x1000) & ~0x1fff);
5241
 
5242
      switch (GET_CODE (XEXP (x, 0)))
5243
        {
5244
        case PLUS:
5245
          if (offset < 0)
5246
            {
5247
              offset = -offset;
5248
              sep = "-";
5249
            }
5250
          else
5251
            sep = "+";
5252
          break;
5253
 
5254
        case MINUS:
5255
          gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5256
          sep = "-";
5257
          break;
5258
 
5259
        default:
5260
          gcc_unreachable ();
5261
        }
5262
 
5263
      if (!read_only_operand (base, VOIDmode) && !flag_pic)
5264
        fputs ("-$global$", file);
5265
      if (offset)
5266
        fprintf (file, "%s%d", sep, offset);
5267
    }
5268
  else
5269
    output_addr_const (file, x);
5270
}
5271
 
5272
/* Output boilerplate text to appear at the beginning of the file.
5273
   There are several possible versions.  */
5274
#define aputs(x) fputs(x, asm_out_file)
5275
static inline void
5276
pa_file_start_level (void)
5277
{
5278
  if (TARGET_64BIT)
5279
    aputs ("\t.LEVEL 2.0w\n");
5280
  else if (TARGET_PA_20)
5281
    aputs ("\t.LEVEL 2.0\n");
5282
  else if (TARGET_PA_11)
5283
    aputs ("\t.LEVEL 1.1\n");
5284
  else
5285
    aputs ("\t.LEVEL 1.0\n");
5286
}
5287
 
5288
static inline void
5289
pa_file_start_space (int sortspace)
5290
{
5291
  aputs ("\t.SPACE $PRIVATE$");
5292
  if (sortspace)
5293
    aputs (",SORT=16");
5294
  aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5295
         "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5296
         "\n\t.SPACE $TEXT$");
5297
  if (sortspace)
5298
    aputs (",SORT=8");
5299
  aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5300
         "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5301
}
5302
 
5303
static inline void
5304
pa_file_start_file (int want_version)
5305
{
5306
  if (write_symbols != NO_DEBUG)
5307
    {
5308
      output_file_directive (asm_out_file, main_input_filename);
5309
      if (want_version)
5310
        aputs ("\t.version\t\"01.01\"\n");
5311
    }
5312
}
5313
 
5314
static inline void
5315
pa_file_start_mcount (const char *aswhat)
5316
{
5317
  if (profile_flag)
5318
    fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5319
}
5320
 
5321
static void
5322
pa_elf_file_start (void)
5323
{
5324
  pa_file_start_level ();
5325
  pa_file_start_mcount ("ENTRY");
5326
  pa_file_start_file (0);
5327
}
5328
 
5329
static void
5330
pa_som_file_start (void)
5331
{
5332
  pa_file_start_level ();
5333
  pa_file_start_space (0);
5334
  aputs ("\t.IMPORT $global$,DATA\n"
5335
         "\t.IMPORT $$dyncall,MILLICODE\n");
5336
  pa_file_start_mcount ("CODE");
5337
  pa_file_start_file (0);
5338
}
5339
 
5340
static void
5341
pa_linux_file_start (void)
5342
{
5343
  pa_file_start_file (1);
5344
  pa_file_start_level ();
5345
  pa_file_start_mcount ("CODE");
5346
}
5347
 
5348
static void
5349
pa_hpux64_gas_file_start (void)
5350
{
5351
  pa_file_start_level ();
5352
#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5353
  if (profile_flag)
5354
    ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5355
#endif
5356
  pa_file_start_file (1);
5357
}
5358
 
5359
static void
5360
pa_hpux64_hpas_file_start (void)
5361
{
5362
  pa_file_start_level ();
5363
  pa_file_start_space (1);
5364
  pa_file_start_mcount ("CODE");
5365
  pa_file_start_file (0);
5366
}
5367
#undef aputs
5368
 
5369
/* Search the deferred plabel list for SYMBOL and return its internal
5370
   label.  If an entry for SYMBOL is not found, a new entry is created.  */
5371
 
5372
rtx
5373
get_deferred_plabel (rtx symbol)
5374
{
5375
  const char *fname = XSTR (symbol, 0);
5376
  size_t i;
5377
 
5378
  /* See if we have already put this function on the list of deferred
5379
     plabels.  This list is generally small, so a liner search is not
5380
     too ugly.  If it proves too slow replace it with something faster.  */
5381
  for (i = 0; i < n_deferred_plabels; i++)
5382
    if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5383
      break;
5384
 
5385
  /* If the deferred plabel list is empty, or this entry was not found
5386
     on the list, create a new entry on the list.  */
5387
  if (deferred_plabels == NULL || i == n_deferred_plabels)
5388
    {
5389
      tree id;
5390
 
5391
      if (deferred_plabels == 0)
5392
        deferred_plabels = (struct deferred_plabel *)
5393
          ggc_alloc (sizeof (struct deferred_plabel));
5394
      else
5395
        deferred_plabels = (struct deferred_plabel *)
5396
          ggc_realloc (deferred_plabels,
5397
                       ((n_deferred_plabels + 1)
5398
                        * sizeof (struct deferred_plabel)));
5399
 
5400
      i = n_deferred_plabels++;
5401
      deferred_plabels[i].internal_label = gen_label_rtx ();
5402
      deferred_plabels[i].symbol = symbol;
5403
 
5404
      /* Gross.  We have just implicitly taken the address of this
5405
         function.  Mark it in the same manner as assemble_name.  */
5406
      id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5407
      if (id)
5408
        mark_referenced (id);
5409
    }
5410
 
5411
  return deferred_plabels[i].internal_label;
5412
}
5413
 
5414
static void
5415
output_deferred_plabels (void)
5416
{
5417
  size_t i;
5418
 
5419
  /* If we have some deferred plabels, then we need to switch into the
5420
     data or readonly data section, and align it to a 4 byte boundary
5421
     before outputting the deferred plabels.  */
5422
  if (n_deferred_plabels)
5423
    {
5424
      switch_to_section (flag_pic ? data_section : readonly_data_section);
5425
      ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5426
    }
5427
 
5428
  /* Now output the deferred plabels.  */
5429
  for (i = 0; i < n_deferred_plabels; i++)
5430
    {
5431
      targetm.asm_out.internal_label (asm_out_file, "L",
5432
                 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5433
      assemble_integer (deferred_plabels[i].symbol,
5434
                        TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5435
    }
5436
}
5437
 
5438
#ifdef HPUX_LONG_DOUBLE_LIBRARY
5439
/* Initialize optabs to point to HPUX long double emulation routines.  */
5440
static void
5441
pa_hpux_init_libfuncs (void)
5442
{
5443
  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5444
  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5445
  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5446
  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5447
  set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5448
  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5449
  set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5450
  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5451
  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5452
 
5453
  set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5454
  set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5455
  set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5456
  set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5457
  set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5458
  set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5459
  set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5460
 
5461
  set_conv_libfunc (sext_optab,   TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5462
  set_conv_libfunc (sext_optab,   TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5463
  set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5464
  set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5465
 
5466
  set_conv_libfunc (sfix_optab,   SImode, TFmode, TARGET_64BIT
5467
                                                  ? "__U_Qfcnvfxt_quad_to_sgl"
5468
                                                  : "_U_Qfcnvfxt_quad_to_sgl");
5469
  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5470
  set_conv_libfunc (ufix_optab,   SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5471
  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5472
 
5473
  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5474
  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5475
  set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5476
  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5477
}
5478
#endif
5479
 
5480
/* HP's millicode routines mean something special to the assembler.
5481
   Keep track of which ones we have used.  */
5482
 
5483
enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5484
static void import_milli (enum millicodes);
5485
static char imported[(int) end1000];
5486
static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5487
static const char import_string[] = ".IMPORT $$....,MILLICODE";
5488
#define MILLI_START 10
5489
 
5490
static void
5491
import_milli (enum millicodes code)
5492
{
5493
  char str[sizeof (import_string)];
5494
 
5495
  if (!imported[(int) code])
5496
    {
5497
      imported[(int) code] = 1;
5498
      strcpy (str, import_string);
5499
      strncpy (str + MILLI_START, milli_names[(int) code], 4);
5500
      output_asm_insn (str, 0);
5501
    }
5502
}
5503
 
5504
/* The register constraints have put the operands and return value in
5505
   the proper registers.  */
5506
 
5507
const char *
5508
output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5509
{
5510
  import_milli (mulI);
5511
  return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5512
}
5513
 
5514
/* Emit the rtl for doing a division by a constant.  */
5515
 
5516
/* Do magic division millicodes exist for this value? */
5517
const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5518
 
5519
/* We'll use an array to keep track of the magic millicodes and
5520
   whether or not we've used them already. [n][0] is signed, [n][1] is
5521
   unsigned.  */
5522
 
5523
static int div_milli[16][2];
5524
 
5525
int
5526
emit_hpdiv_const (rtx *operands, int unsignedp)
5527
{
5528
  if (GET_CODE (operands[2]) == CONST_INT
5529
      && INTVAL (operands[2]) > 0
5530
      && INTVAL (operands[2]) < 16
5531
      && magic_milli[INTVAL (operands[2])])
5532
    {
5533
      rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5534
 
5535
      emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5536
      emit
5537
        (gen_rtx_PARALLEL
5538
         (VOIDmode,
5539
          gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5540
                                     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5541
                                                     SImode,
5542
                                                     gen_rtx_REG (SImode, 26),
5543
                                                     operands[2])),
5544
                     gen_rtx_CLOBBER (VOIDmode, operands[4]),
5545
                     gen_rtx_CLOBBER (VOIDmode, operands[3]),
5546
                     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5547
                     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5548
                     gen_rtx_CLOBBER (VOIDmode, ret))));
5549
      emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5550
      return 1;
5551
    }
5552
  return 0;
5553
}
5554
 
5555
const char *
5556
output_div_insn (rtx *operands, int unsignedp, rtx insn)
5557
{
5558
  int divisor;
5559
 
5560
  /* If the divisor is a constant, try to use one of the special
5561
     opcodes .*/
5562
  if (GET_CODE (operands[0]) == CONST_INT)
5563
    {
5564
      static char buf[100];
5565
      divisor = INTVAL (operands[0]);
5566
      if (!div_milli[divisor][unsignedp])
5567
        {
5568
          div_milli[divisor][unsignedp] = 1;
5569
          if (unsignedp)
5570
            output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5571
          else
5572
            output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5573
        }
5574
      if (unsignedp)
5575
        {
5576
          sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5577
                   INTVAL (operands[0]));
5578
          return output_millicode_call (insn,
5579
                                        gen_rtx_SYMBOL_REF (SImode, buf));
5580
        }
5581
      else
5582
        {
5583
          sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5584
                   INTVAL (operands[0]));
5585
          return output_millicode_call (insn,
5586
                                        gen_rtx_SYMBOL_REF (SImode, buf));
5587
        }
5588
    }
5589
  /* Divisor isn't a special constant.  */
5590
  else
5591
    {
5592
      if (unsignedp)
5593
        {
5594
          import_milli (divU);
5595
          return output_millicode_call (insn,
5596
                                        gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5597
        }
5598
      else
5599
        {
5600
          import_milli (divI);
5601
          return output_millicode_call (insn,
5602
                                        gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5603
        }
5604
    }
5605
}
5606
 
5607
/* Output a $$rem millicode to do mod.  */
5608
 
5609
const char *
5610
output_mod_insn (int unsignedp, rtx insn)
5611
{
5612
  if (unsignedp)
5613
    {
5614
      import_milli (remU);
5615
      return output_millicode_call (insn,
5616
                                    gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5617
    }
5618
  else
5619
    {
5620
      import_milli (remI);
5621
      return output_millicode_call (insn,
5622
                                    gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5623
    }
5624
}
5625
 
5626
void
5627
output_arg_descriptor (rtx call_insn)
5628
{
5629
  const char *arg_regs[4];
5630
  enum machine_mode arg_mode;
5631
  rtx link;
5632
  int i, output_flag = 0;
5633
  int regno;
5634
 
5635
  /* We neither need nor want argument location descriptors for the
5636
     64bit runtime environment or the ELF32 environment.  */
5637
  if (TARGET_64BIT || TARGET_ELF32)
5638
    return;
5639
 
5640
  for (i = 0; i < 4; i++)
5641
    arg_regs[i] = 0;
5642
 
5643
  /* Specify explicitly that no argument relocations should take place
5644
     if using the portable runtime calling conventions.  */
5645
  if (TARGET_PORTABLE_RUNTIME)
5646
    {
5647
      fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5648
             asm_out_file);
5649
      return;
5650
    }
5651
 
5652
  gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5653
  for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5654
       link; link = XEXP (link, 1))
5655
    {
5656
      rtx use = XEXP (link, 0);
5657
 
5658
      if (! (GET_CODE (use) == USE
5659
             && GET_CODE (XEXP (use, 0)) == REG
5660
             && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5661
        continue;
5662
 
5663
      arg_mode = GET_MODE (XEXP (use, 0));
5664
      regno = REGNO (XEXP (use, 0));
5665
      if (regno >= 23 && regno <= 26)
5666
        {
5667
          arg_regs[26 - regno] = "GR";
5668
          if (arg_mode == DImode)
5669
            arg_regs[25 - regno] = "GR";
5670
        }
5671
      else if (regno >= 32 && regno <= 39)
5672
        {
5673
          if (arg_mode == SFmode)
5674
            arg_regs[(regno - 32) / 2] = "FR";
5675
          else
5676
            {
5677
#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5678
              arg_regs[(regno - 34) / 2] = "FR";
5679
              arg_regs[(regno - 34) / 2 + 1] = "FU";
5680
#else
5681
              arg_regs[(regno - 34) / 2] = "FU";
5682
              arg_regs[(regno - 34) / 2 + 1] = "FR";
5683
#endif
5684
            }
5685
        }
5686
    }
5687
  fputs ("\t.CALL ", asm_out_file);
5688
  for (i = 0; i < 4; i++)
5689
    {
5690
      if (arg_regs[i])
5691
        {
5692
          if (output_flag++)
5693
            fputc (',', asm_out_file);
5694
          fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5695
        }
5696
    }
5697
  fputc ('\n', asm_out_file);
5698
}
5699
 
5700
static enum reg_class
5701
pa_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
5702
                     enum machine_mode mode, secondary_reload_info *sri)
5703
{
5704
  int is_symbolic, regno;
5705
 
5706
  /* Handle the easy stuff first.  */
5707
  if (rclass == R1_REGS)
5708
    return NO_REGS;
5709
 
5710
  if (REG_P (x))
5711
    {
5712
      regno = REGNO (x);
5713
      if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5714
        return NO_REGS;
5715
    }
5716
  else
5717
    regno = -1;
5718
 
5719
  /* If we have something like (mem (mem (...)), we can safely assume the
5720
     inner MEM will end up in a general register after reloading, so there's
5721
     no need for a secondary reload.  */
5722
  if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5723
    return NO_REGS;
5724
 
5725
  /* Trying to load a constant into a FP register during PIC code
5726
     generation requires %r1 as a scratch register.  */
5727
  if (flag_pic
5728
      && (mode == SImode || mode == DImode)
5729
      && FP_REG_CLASS_P (rclass)
5730
      && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5731
    {
5732
      sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5733
                    : CODE_FOR_reload_indi_r1);
5734
      return NO_REGS;
5735
    }
5736
 
5737
  /* Profiling showed the PA port spends about 1.3% of its compilation
5738
     time in true_regnum from calls inside pa_secondary_reload_class.  */
5739
  if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5740
    regno = true_regnum (x);
5741
 
5742
  /* In order to allow 14-bit displacements in integer loads and stores,
5743
     we need to prevent reload from generating out of range integer mode
5744
     loads and stores to the floating point registers.  Previously, we
5745
     used to call for a secondary reload and have emit_move_sequence()
5746
     fix the instruction sequence.  However, reload occasionally wouldn't
5747
     generate the reload and we would end up with an invalid REG+D memory
5748
     address.  So, now we use an intermediate general register for most
5749
     memory loads and stores.  */
5750
  if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5751
      && GET_MODE_CLASS (mode) == MODE_INT
5752
      && FP_REG_CLASS_P (rclass))
5753
    {
5754
      /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5755
         the secondary reload needed for a pseudo.  It never passes a
5756
         REG+D address.  */
5757
      if (GET_CODE (x) == MEM)
5758
        {
5759
          x = XEXP (x, 0);
5760
 
5761
          /* We don't need an intermediate for indexed and LO_SUM DLT
5762
             memory addresses.  When INT14_OK_STRICT is true, it might
5763
             appear that we could directly allow register indirect
5764
             memory addresses.  However, this doesn't work because we
5765
             don't support SUBREGs in floating-point register copies
5766
             and reload doesn't tell us when it's going to use a SUBREG.  */
5767
          if (IS_INDEX_ADDR_P (x)
5768
              || IS_LO_SUM_DLT_ADDR_P (x))
5769
            return NO_REGS;
5770
 
5771
          /* Otherwise, we need an intermediate general register.  */
5772
          return GENERAL_REGS;
5773
        }
5774
 
5775
      /* Request a secondary reload with a general scratch register
5776
         for everthing else.  ??? Could symbolic operands be handled
5777
         directly when generating non-pic PA 2.0 code?  */
5778
      sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5779
      return NO_REGS;
5780
    }
5781
 
5782
  /* We need a secondary register (GPR) for copies between the SAR
5783
     and anything other than a general register.  */
5784
  if (rclass == SHIFT_REGS && (regno <= 0 || regno >= 32))
5785
    {
5786
      sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5787
      return NO_REGS;
5788
    }
5789
 
5790
  /* A SAR<->FP register copy requires a secondary register (GPR) as
5791
     well as secondary memory.  */
5792
  if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5793
      && (REGNO_REG_CLASS (regno) == SHIFT_REGS
5794
      && FP_REG_CLASS_P (rclass)))
5795
    {
5796
      sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5797
      return NO_REGS;
5798
    }
5799
 
5800
  /* Secondary reloads of symbolic operands require %r1 as a scratch
5801
     register when we're generating PIC code and when the operand isn't
5802
     readonly.  */
5803
  if (GET_CODE (x) == HIGH)
5804
    x = XEXP (x, 0);
5805
 
5806
  /* Profiling has showed GCC spends about 2.6% of its compilation
5807
     time in symbolic_operand from calls inside pa_secondary_reload_class.
5808
     So, we use an inline copy to avoid useless work.  */
5809
  switch (GET_CODE (x))
5810
    {
5811
      rtx op;
5812
 
5813
      case SYMBOL_REF:
5814
        is_symbolic = !SYMBOL_REF_TLS_MODEL (x);
5815
        break;
5816
      case LABEL_REF:
5817
        is_symbolic = 1;
5818
        break;
5819
      case CONST:
5820
        op = XEXP (x, 0);
5821
        is_symbolic = (GET_CODE (op) == PLUS
5822
                       && ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
5823
                            && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
5824
                           || GET_CODE (XEXP (op, 0)) == LABEL_REF)
5825
                       && GET_CODE (XEXP (op, 1)) == CONST_INT);
5826
        break;
5827
      default:
5828
        is_symbolic = 0;
5829
        break;
5830
    }
5831
 
5832
  if (is_symbolic && (flag_pic || !read_only_operand (x, VOIDmode)))
5833
    {
5834
      gcc_assert (mode == SImode || mode == DImode);
5835
      sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5836
                    : CODE_FOR_reload_indi_r1);
5837
    }
5838
 
5839
  return NO_REGS;
5840
}
5841
 
5842
/* Implement TARGET_EXTRA_LIVE_ON_ENTRY.  The argument pointer
5843
   is only marked as live on entry by df-scan when it is a fixed
5844
   register.  It isn't a fixed register in the 64-bit runtime,
5845
   so we need to mark it here.  */
5846
 
5847
static void
5848
pa_extra_live_on_entry (bitmap regs)
5849
{
5850
  if (TARGET_64BIT)
5851
    bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5852
}
5853
 
5854
/* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile
5855
   to prevent it from being deleted.  */
5856
 
5857
rtx
5858
pa_eh_return_handler_rtx (void)
5859
{
5860
  rtx tmp;
5861
 
5862
  tmp = gen_rtx_PLUS (word_mode, frame_pointer_rtx,
5863
                      TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5864
  tmp = gen_rtx_MEM (word_mode, tmp);
5865
  tmp->volatil = 1;
5866
  return tmp;
5867
}
5868
 
5869
/* In the 32-bit runtime, arguments larger than eight bytes are passed
5870
   by invisible reference.  As a GCC extension, we also pass anything
5871
   with a zero or variable size by reference.
5872
 
5873
   The 64-bit runtime does not describe passing any types by invisible
5874
   reference.  The internals of GCC can't currently handle passing
5875
   empty structures, and zero or variable length arrays when they are
5876
   not passed entirely on the stack or by reference.  Thus, as a GCC
5877
   extension, we pass these types by reference.  The HP compiler doesn't
5878
   support these types, so hopefully there shouldn't be any compatibility
5879
   issues.  This may have to be revisited when HP releases a C99 compiler
5880
   or updates the ABI.  */
5881
 
5882
static bool
5883
pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5884
                      enum machine_mode mode, const_tree type,
5885
                      bool named ATTRIBUTE_UNUSED)
5886
{
5887
  HOST_WIDE_INT size;
5888
 
5889
  if (type)
5890
    size = int_size_in_bytes (type);
5891
  else
5892
    size = GET_MODE_SIZE (mode);
5893
 
5894
  if (TARGET_64BIT)
5895
    return size <= 0;
5896
  else
5897
    return size <= 0 || size > 8;
5898
}
5899
 
5900
enum direction
5901
function_arg_padding (enum machine_mode mode, const_tree type)
5902
{
5903
  if (mode == BLKmode
5904
      || (TARGET_64BIT
5905
          && type
5906
          && (AGGREGATE_TYPE_P (type)
5907
              || TREE_CODE (type) == COMPLEX_TYPE
5908
              || TREE_CODE (type) == VECTOR_TYPE)))
5909
    {
5910
      /* Return none if justification is not required.  */
5911
      if (type
5912
          && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5913
          && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5914
        return none;
5915
 
5916
      /* The directions set here are ignored when a BLKmode argument larger
5917
         than a word is placed in a register.  Different code is used for
5918
         the stack and registers.  This makes it difficult to have a
5919
         consistent data representation for both the stack and registers.
5920
         For both runtimes, the justification and padding for arguments on
5921
         the stack and in registers should be identical.  */
5922
      if (TARGET_64BIT)
5923
        /* The 64-bit runtime specifies left justification for aggregates.  */
5924
        return upward;
5925
      else
5926
        /* The 32-bit runtime architecture specifies right justification.
5927
           When the argument is passed on the stack, the argument is padded
5928
           with garbage on the left.  The HP compiler pads with zeros.  */
5929
        return downward;
5930
    }
5931
 
5932
  if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5933
    return downward;
5934
  else
5935
    return none;
5936
}
5937
 
5938
 
5939
/* Do what is necessary for `va_start'.  We look at the current function
5940
   to determine if stdargs or varargs is used and fill in an initial
5941
   va_list.  A pointer to this constructor is returned.  */
5942
 
5943
static rtx
5944
hppa_builtin_saveregs (void)
5945
{
5946
  rtx offset, dest;
5947
  tree fntype = TREE_TYPE (current_function_decl);
5948
  int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5949
                   && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5950
                       != void_type_node)))
5951
                ? UNITS_PER_WORD : 0);
5952
 
5953
  if (argadj)
5954
    offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
5955
  else
5956
    offset = crtl->args.arg_offset_rtx;
5957
 
5958
  if (TARGET_64BIT)
5959
    {
5960
      int i, off;
5961
 
5962
      /* Adjust for varargs/stdarg differences.  */
5963
      if (argadj)
5964
        offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
5965
      else
5966
        offset = crtl->args.arg_offset_rtx;
5967
 
5968
      /* We need to save %r26 .. %r19 inclusive starting at offset -64
5969
         from the incoming arg pointer and growing to larger addresses.  */
5970
      for (i = 26, off = -64; i >= 19; i--, off += 8)
5971
        emit_move_insn (gen_rtx_MEM (word_mode,
5972
                                     plus_constant (arg_pointer_rtx, off)),
5973
                        gen_rtx_REG (word_mode, i));
5974
 
5975
      /* The incoming args pointer points just beyond the flushback area;
5976
         normally this is not a serious concern.  However, when we are doing
5977
         varargs/stdargs we want to make the arg pointer point to the start
5978
         of the incoming argument area.  */
5979
      emit_move_insn (virtual_incoming_args_rtx,
5980
                      plus_constant (arg_pointer_rtx, -64));
5981
 
5982
      /* Now return a pointer to the first anonymous argument.  */
5983
      return copy_to_reg (expand_binop (Pmode, add_optab,
5984
                                        virtual_incoming_args_rtx,
5985
                                        offset, 0, 0, OPTAB_LIB_WIDEN));
5986
    }
5987
 
5988
  /* Store general registers on the stack.  */
5989
  dest = gen_rtx_MEM (BLKmode,
5990
                      plus_constant (crtl->args.internal_arg_pointer,
5991
                                     -16));
5992
  set_mem_alias_set (dest, get_varargs_alias_set ());
5993
  set_mem_align (dest, BITS_PER_WORD);
5994
  move_block_from_reg (23, dest, 4);
5995
 
5996
  /* move_block_from_reg will emit code to store the argument registers
5997
     individually as scalar stores.
5998
 
5999
     However, other insns may later load from the same addresses for
6000
     a structure load (passing a struct to a varargs routine).
6001
 
6002
     The alias code assumes that such aliasing can never happen, so we
6003
     have to keep memory referencing insns from moving up beyond the
6004
     last argument register store.  So we emit a blockage insn here.  */
6005
  emit_insn (gen_blockage ());
6006
 
6007
  return copy_to_reg (expand_binop (Pmode, add_optab,
6008
                                    crtl->args.internal_arg_pointer,
6009
                                    offset, 0, 0, OPTAB_LIB_WIDEN));
6010
}
6011
 
6012
static void
6013
hppa_va_start (tree valist, rtx nextarg)
6014
{
6015
  nextarg = expand_builtin_saveregs ();
6016
  std_expand_builtin_va_start (valist, nextarg);
6017
}
6018
 
6019
static tree
6020
hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6021
                           gimple_seq *post_p)
6022
{
6023
  if (TARGET_64BIT)
6024
    {
6025
      /* Args grow upward.  We can use the generic routines.  */
6026
      return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6027
    }
6028
  else /* !TARGET_64BIT */
6029
    {
6030
      tree ptr = build_pointer_type (type);
6031
      tree valist_type;
6032
      tree t, u;
6033
      unsigned int size, ofs;
6034
      bool indirect;
6035
 
6036
      indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6037
      if (indirect)
6038
        {
6039
          type = ptr;
6040
          ptr = build_pointer_type (type);
6041
        }
6042
      size = int_size_in_bytes (type);
6043
      valist_type = TREE_TYPE (valist);
6044
 
6045
      /* Args grow down.  Not handled by generic routines.  */
6046
 
6047
      u = fold_convert (sizetype, size_in_bytes (type));
6048
      u = fold_build1 (NEGATE_EXPR, sizetype, u);
6049
      t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
6050
 
6051
      /* Copied from va-pa.h, but we probably don't need to align to
6052
         word size, since we generate and preserve that invariant.  */
6053
      u = size_int (size > 4 ? -8 : -4);
6054
      t = fold_convert (sizetype, t);
6055
      t = build2 (BIT_AND_EXPR, sizetype, t, u);
6056
      t = fold_convert (valist_type, t);
6057
 
6058
      t = build2 (MODIFY_EXPR, valist_type, valist, t);
6059
 
6060
      ofs = (8 - size) % 4;
6061
      if (ofs != 0)
6062
        {
6063
          u = size_int (ofs);
6064
          t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
6065
        }
6066
 
6067
      t = fold_convert (ptr, t);
6068
      t = build_va_arg_indirect_ref (t);
6069
 
6070
      if (indirect)
6071
        t = build_va_arg_indirect_ref (t);
6072
 
6073
      return t;
6074
    }
6075
}
6076
 
6077
/* True if MODE is valid for the target.  By "valid", we mean able to
6078
   be manipulated in non-trivial ways.  In particular, this means all
6079
   the arithmetic is supported.
6080
 
6081
   Currently, TImode is not valid as the HP 64-bit runtime documentation
6082
   doesn't document the alignment and calling conventions for this type.
6083
   Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6084
   2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
6085
 
6086
static bool
6087
pa_scalar_mode_supported_p (enum machine_mode mode)
6088
{
6089
  int precision = GET_MODE_PRECISION (mode);
6090
 
6091
  switch (GET_MODE_CLASS (mode))
6092
    {
6093
    case MODE_PARTIAL_INT:
6094
    case MODE_INT:
6095
      if (precision == CHAR_TYPE_SIZE)
6096
        return true;
6097
      if (precision == SHORT_TYPE_SIZE)
6098
        return true;
6099
      if (precision == INT_TYPE_SIZE)
6100
        return true;
6101
      if (precision == LONG_TYPE_SIZE)
6102
        return true;
6103
      if (precision == LONG_LONG_TYPE_SIZE)
6104
        return true;
6105
      return false;
6106
 
6107
    case MODE_FLOAT:
6108
      if (precision == FLOAT_TYPE_SIZE)
6109
        return true;
6110
      if (precision == DOUBLE_TYPE_SIZE)
6111
        return true;
6112
      if (precision == LONG_DOUBLE_TYPE_SIZE)
6113
        return true;
6114
      return false;
6115
 
6116
    case MODE_DECIMAL_FLOAT:
6117
      return false;
6118
 
6119
    default:
6120
      gcc_unreachable ();
6121
    }
6122
}
6123
 
6124
/* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6125
   it branches to the next real instruction.  Otherwise, return FALSE.  */
6126
 
6127
static bool
6128
branch_to_delay_slot_p (rtx insn)
6129
{
6130
  if (dbr_sequence_length ())
6131
    return FALSE;
6132
 
6133
  return next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn);
6134
}
6135
 
6136
/* Return TRUE if INSN, a jump insn, needs a nop in its delay slot.
6137
 
6138
   This occurs when INSN has an unfilled delay slot and is followed
6139
   by an ASM_INPUT.  Disaster can occur if the ASM_INPUT is empty and
6140
   the jump branches into the delay slot.  So, we add a nop in the delay
6141
   slot just to be safe.  This messes up our instruction count, but we
6142
   don't know how big the ASM_INPUT insn is anyway.  */
6143
 
6144
static bool
6145
branch_needs_nop_p (rtx insn)
6146
{
6147
  rtx next_insn;
6148
 
6149
  if (dbr_sequence_length ())
6150
    return FALSE;
6151
 
6152
  next_insn = next_real_insn (insn);
6153
  return GET_CODE (PATTERN (next_insn)) == ASM_INPUT;
6154
}
6155
 
6156
/* This routine handles all the normal conditional branch sequences we
6157
   might need to generate.  It handles compare immediate vs compare
6158
   register, nullification of delay slots, varying length branches,
6159
   negated branches, and all combinations of the above.  It returns the
6160
   output appropriate to emit the branch corresponding to all given
6161
   parameters.  */
6162
 
6163
const char *
6164
output_cbranch (rtx *operands, int negated, rtx insn)
6165
{
6166
  static char buf[100];
6167
  int useskip = 0;
6168
  int nullify = INSN_ANNULLED_BRANCH_P (insn);
6169
  int length = get_attr_length (insn);
6170
  int xdelay;
6171
 
6172
  /* A conditional branch to the following instruction (e.g. the delay slot)
6173
     is asking for a disaster.  This can happen when not optimizing and
6174
     when jump optimization fails.
6175
 
6176
     While it is usually safe to emit nothing, this can fail if the
6177
     preceding instruction is a nullified branch with an empty delay
6178
     slot and the same branch target as this branch.  We could check
6179
     for this but jump optimization should eliminate nop jumps.  It
6180
     is always safe to emit a nop.  */
6181
  if (branch_to_delay_slot_p (insn))
6182
    return "nop";
6183
 
6184
  /* The doubleword form of the cmpib instruction doesn't have the LEU
6185
     and GTU conditions while the cmpb instruction does.  Since we accept
6186
     zero for cmpb, we must ensure that we use cmpb for the comparison.  */
6187
  if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6188
    operands[2] = gen_rtx_REG (DImode, 0);
6189
  if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6190
    operands[1] = gen_rtx_REG (DImode, 0);
6191
 
6192
  /* If this is a long branch with its delay slot unfilled, set `nullify'
6193
     as it can nullify the delay slot and save a nop.  */
6194
  if (length == 8 && dbr_sequence_length () == 0)
6195
    nullify = 1;
6196
 
6197
  /* If this is a short forward conditional branch which did not get
6198
     its delay slot filled, the delay slot can still be nullified.  */
6199
  if (! nullify && length == 4 && dbr_sequence_length () == 0)
6200
    nullify = forward_branch_p (insn);
6201
 
6202
  /* A forward branch over a single nullified insn can be done with a
6203
     comclr instruction.  This avoids a single cycle penalty due to
6204
     mis-predicted branch if we fall through (branch not taken).  */
6205
  if (length == 4
6206
      && next_real_insn (insn) != 0
6207
      && get_attr_length (next_real_insn (insn)) == 4
6208
      && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6209
      && nullify)
6210
    useskip = 1;
6211
 
6212
  switch (length)
6213
    {
6214
      /* All short conditional branches except backwards with an unfilled
6215
         delay slot.  */
6216
      case 4:
6217
        if (useskip)
6218
          strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6219
        else
6220
          strcpy (buf, "{com%I2b,|cmp%I2b,}");
6221
        if (GET_MODE (operands[1]) == DImode)
6222
          strcat (buf, "*");
6223
        if (negated)
6224
          strcat (buf, "%B3");
6225
        else
6226
          strcat (buf, "%S3");
6227
        if (useskip)
6228
          strcat (buf, " %2,%r1,%%r0");
6229
        else if (nullify)
6230
          {
6231
            if (branch_needs_nop_p (insn))
6232
              strcat (buf, ",n %2,%r1,%0%#");
6233
            else
6234
              strcat (buf, ",n %2,%r1,%0");
6235
          }
6236
        else
6237
          strcat (buf, " %2,%r1,%0");
6238
        break;
6239
 
6240
     /* All long conditionals.  Note a short backward branch with an
6241
        unfilled delay slot is treated just like a long backward branch
6242
        with an unfilled delay slot.  */
6243
      case 8:
6244
        /* Handle weird backwards branch with a filled delay slot
6245
           which is nullified.  */
6246
        if (dbr_sequence_length () != 0
6247
            && ! forward_branch_p (insn)
6248
            && nullify)
6249
          {
6250
            strcpy (buf, "{com%I2b,|cmp%I2b,}");
6251
            if (GET_MODE (operands[1]) == DImode)
6252
              strcat (buf, "*");
6253
            if (negated)
6254
              strcat (buf, "%S3");
6255
            else
6256
              strcat (buf, "%B3");
6257
            strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6258
          }
6259
        /* Handle short backwards branch with an unfilled delay slot.
6260
           Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6261
           taken and untaken branches.  */
6262
        else if (dbr_sequence_length () == 0
6263
                 && ! forward_branch_p (insn)
6264
                 && INSN_ADDRESSES_SET_P ()
6265
                 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6266
                                    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6267
          {
6268
            strcpy (buf, "{com%I2b,|cmp%I2b,}");
6269
            if (GET_MODE (operands[1]) == DImode)
6270
              strcat (buf, "*");
6271
            if (negated)
6272
              strcat (buf, "%B3 %2,%r1,%0%#");
6273
            else
6274
              strcat (buf, "%S3 %2,%r1,%0%#");
6275
          }
6276
        else
6277
          {
6278
            strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6279
            if (GET_MODE (operands[1]) == DImode)
6280
              strcat (buf, "*");
6281
            if (negated)
6282
              strcat (buf, "%S3");
6283
            else
6284
              strcat (buf, "%B3");
6285
            if (nullify)
6286
              strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6287
            else
6288
              strcat (buf, " %2,%r1,%%r0\n\tb %0");
6289
          }
6290
        break;
6291
 
6292
      default:
6293
        /* The reversed conditional branch must branch over one additional
6294
           instruction if the delay slot is filled and needs to be extracted
6295
           by output_lbranch.  If the delay slot is empty or this is a
6296
           nullified forward branch, the instruction after the reversed
6297
           condition branch must be nullified.  */
6298
        if (dbr_sequence_length () == 0
6299
            || (nullify && forward_branch_p (insn)))
6300
          {
6301
            nullify = 1;
6302
            xdelay = 0;
6303
            operands[4] = GEN_INT (length);
6304
          }
6305
        else
6306
          {
6307
            xdelay = 1;
6308
            operands[4] = GEN_INT (length + 4);
6309
          }
6310
 
6311
        /* Create a reversed conditional branch which branches around
6312
           the following insns.  */
6313
        if (GET_MODE (operands[1]) != DImode)
6314
          {
6315
            if (nullify)
6316
              {
6317
                if (negated)
6318
                  strcpy (buf,
6319
                    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6320
                else
6321
                  strcpy (buf,
6322
                    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6323
              }
6324
            else
6325
              {
6326
                if (negated)
6327
                  strcpy (buf,
6328
                    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6329
                else
6330
                  strcpy (buf,
6331
                    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6332
              }
6333
          }
6334
        else
6335
          {
6336
            if (nullify)
6337
              {
6338
                if (negated)
6339
                  strcpy (buf,
6340
                    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6341
                else
6342
                  strcpy (buf,
6343
                    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6344
              }
6345
            else
6346
              {
6347
                if (negated)
6348
                  strcpy (buf,
6349
                    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6350
                else
6351
                  strcpy (buf,
6352
                    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6353
              }
6354
          }
6355
 
6356
        output_asm_insn (buf, operands);
6357
        return output_lbranch (operands[0], insn, xdelay);
6358
    }
6359
  return buf;
6360
}
6361
 
6362
/* This routine handles output of long unconditional branches that
6363
   exceed the maximum range of a simple branch instruction.  Since
6364
   we don't have a register available for the branch, we save register
6365
   %r1 in the frame marker, load the branch destination DEST into %r1,
6366
   execute the branch, and restore %r1 in the delay slot of the branch.
6367
 
6368
   Since long branches may have an insn in the delay slot and the
6369
   delay slot is used to restore %r1, we in general need to extract
6370
   this insn and execute it before the branch.  However, to facilitate
6371
   use of this function by conditional branches, we also provide an
6372
   option to not extract the delay insn so that it will be emitted
6373
   after the long branch.  So, if there is an insn in the delay slot,
6374
   it is extracted if XDELAY is nonzero.
6375
 
6376
   The lengths of the various long-branch sequences are 20, 16 and 24
6377
   bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6378
 
6379
const char *
6380
output_lbranch (rtx dest, rtx insn, int xdelay)
6381
{
6382
  rtx xoperands[2];
6383
 
6384
  xoperands[0] = dest;
6385
 
6386
  /* First, free up the delay slot.  */
6387
  if (xdelay && dbr_sequence_length () != 0)
6388
    {
6389
      /* We can't handle a jump in the delay slot.  */
6390
      gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6391
 
6392
      final_scan_insn (NEXT_INSN (insn), asm_out_file,
6393
                       optimize, 0, NULL);
6394
 
6395
      /* Now delete the delay insn.  */
6396
      SET_INSN_DELETED (NEXT_INSN (insn));
6397
    }
6398
 
6399
  /* Output an insn to save %r1.  The runtime documentation doesn't
6400
     specify whether the "Clean Up" slot in the callers frame can
6401
     be clobbered by the callee.  It isn't copied by HP's builtin
6402
     alloca, so this suggests that it can be clobbered if necessary.
6403
     The "Static Link" location is copied by HP builtin alloca, so
6404
     we avoid using it.  Using the cleanup slot might be a problem
6405
     if we have to interoperate with languages that pass cleanup
6406
     information.  However, it should be possible to handle these
6407
     situations with GCC's asm feature.
6408
 
6409
     The "Current RP" slot is reserved for the called procedure, so
6410
     we try to use it when we don't have a frame of our own.  It's
6411
     rather unlikely that we won't have a frame when we need to emit
6412
     a very long branch.
6413
 
6414
     Really the way to go long term is a register scavenger; goto
6415
     the target of the jump and find a register which we can use
6416
     as a scratch to hold the value in %r1.  Then, we wouldn't have
6417
     to free up the delay slot or clobber a slot that may be needed
6418
     for other purposes.  */
6419
  if (TARGET_64BIT)
6420
    {
6421
      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6422
        /* Use the return pointer slot in the frame marker.  */
6423
        output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6424
      else
6425
        /* Use the slot at -40 in the frame marker since HP builtin
6426
           alloca doesn't copy it.  */
6427
        output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6428
    }
6429
  else
6430
    {
6431
      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6432
        /* Use the return pointer slot in the frame marker.  */
6433
        output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6434
      else
6435
        /* Use the "Clean Up" slot in the frame marker.  In GCC,
6436
           the only other use of this location is for copying a
6437
           floating point double argument from a floating-point
6438
           register to two general registers.  The copy is done
6439
           as an "atomic" operation when outputting a call, so it
6440
           won't interfere with our using the location here.  */
6441
        output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6442
    }
6443
 
6444
  if (TARGET_PORTABLE_RUNTIME)
6445
    {
6446
      output_asm_insn ("ldil L'%0,%%r1", xoperands);
6447
      output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6448
      output_asm_insn ("bv %%r0(%%r1)", xoperands);
6449
    }
6450
  else if (flag_pic)
6451
    {
6452
      output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6453
      if (TARGET_SOM || !TARGET_GAS)
6454
        {
6455
          xoperands[1] = gen_label_rtx ();
6456
          output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6457
          targetm.asm_out.internal_label (asm_out_file, "L",
6458
                                          CODE_LABEL_NUMBER (xoperands[1]));
6459
          output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6460
        }
6461
      else
6462
        {
6463
          output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6464
          output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6465
        }
6466
      output_asm_insn ("bv %%r0(%%r1)", xoperands);
6467
    }
6468
  else
6469
    /* Now output a very long branch to the original target.  */
6470
    output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6471
 
6472
  /* Now restore the value of %r1 in the delay slot.  */
6473
  if (TARGET_64BIT)
6474
    {
6475
      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6476
        return "ldd -16(%%r30),%%r1";
6477
      else
6478
        return "ldd -40(%%r30),%%r1";
6479
    }
6480
  else
6481
    {
6482
      if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6483
        return "ldw -20(%%r30),%%r1";
6484
      else
6485
        return "ldw -12(%%r30),%%r1";
6486
    }
6487
}
6488
 
6489
/* This routine handles all the branch-on-bit conditional branch sequences we
6490
   might need to generate.  It handles nullification of delay slots,
6491
   varying length branches, negated branches and all combinations of the
6492
   above.  it returns the appropriate output template to emit the branch.  */
6493
 
6494
const char *
6495
output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6496
{
6497
  static char buf[100];
6498
  int useskip = 0;
6499
  int nullify = INSN_ANNULLED_BRANCH_P (insn);
6500
  int length = get_attr_length (insn);
6501
  int xdelay;
6502
 
6503
  /* A conditional branch to the following instruction (e.g. the delay slot) is
6504
     asking for a disaster.  I do not think this can happen as this pattern
6505
     is only used when optimizing; jump optimization should eliminate the
6506
     jump.  But be prepared just in case.  */
6507
 
6508
  if (branch_to_delay_slot_p (insn))
6509
    return "nop";
6510
 
6511
  /* If this is a long branch with its delay slot unfilled, set `nullify'
6512
     as it can nullify the delay slot and save a nop.  */
6513
  if (length == 8 && dbr_sequence_length () == 0)
6514
    nullify = 1;
6515
 
6516
  /* If this is a short forward conditional branch which did not get
6517
     its delay slot filled, the delay slot can still be nullified.  */
6518
  if (! nullify && length == 4 && dbr_sequence_length () == 0)
6519
    nullify = forward_branch_p (insn);
6520
 
6521
  /* A forward branch over a single nullified insn can be done with a
6522
     extrs instruction.  This avoids a single cycle penalty due to
6523
     mis-predicted branch if we fall through (branch not taken).  */
6524
 
6525
  if (length == 4
6526
      && next_real_insn (insn) != 0
6527
      && get_attr_length (next_real_insn (insn)) == 4
6528
      && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6529
      && nullify)
6530
    useskip = 1;
6531
 
6532
  switch (length)
6533
    {
6534
 
6535
      /* All short conditional branches except backwards with an unfilled
6536
         delay slot.  */
6537
      case 4:
6538
        if (useskip)
6539
          strcpy (buf, "{extrs,|extrw,s,}");
6540
        else
6541
          strcpy (buf, "bb,");
6542
        if (useskip && GET_MODE (operands[0]) == DImode)
6543
          strcpy (buf, "extrd,s,*");
6544
        else if (GET_MODE (operands[0]) == DImode)
6545
          strcpy (buf, "bb,*");
6546
        if ((which == 0 && negated)
6547
             || (which == 1 && ! negated))
6548
          strcat (buf, ">=");
6549
        else
6550
          strcat (buf, "<");
6551
        if (useskip)
6552
          strcat (buf, " %0,%1,1,%%r0");
6553
        else if (nullify && negated)
6554
          {
6555
            if (branch_needs_nop_p (insn))
6556
              strcat (buf, ",n %0,%1,%3%#");
6557
            else
6558
              strcat (buf, ",n %0,%1,%3");
6559
          }
6560
        else if (nullify && ! negated)
6561
          {
6562
            if (branch_needs_nop_p (insn))
6563
              strcat (buf, ",n %0,%1,%2%#");
6564
            else
6565
              strcat (buf, ",n %0,%1,%2");
6566
          }
6567
        else if (! nullify && negated)
6568
          strcat (buf, " %0,%1,%3");
6569
        else if (! nullify && ! negated)
6570
          strcat (buf, " %0,%1,%2");
6571
        break;
6572
 
6573
     /* All long conditionals.  Note a short backward branch with an
6574
        unfilled delay slot is treated just like a long backward branch
6575
        with an unfilled delay slot.  */
6576
      case 8:
6577
        /* Handle weird backwards branch with a filled delay slot
6578
           which is nullified.  */
6579
        if (dbr_sequence_length () != 0
6580
            && ! forward_branch_p (insn)
6581
            && nullify)
6582
          {
6583
            strcpy (buf, "bb,");
6584
            if (GET_MODE (operands[0]) == DImode)
6585
              strcat (buf, "*");
6586
            if ((which == 0 && negated)
6587
                || (which == 1 && ! negated))
6588
              strcat (buf, "<");
6589
            else
6590
              strcat (buf, ">=");
6591
            if (negated)
6592
              strcat (buf, ",n %0,%1,.+12\n\tb %3");
6593
            else
6594
              strcat (buf, ",n %0,%1,.+12\n\tb %2");
6595
          }
6596
        /* Handle short backwards branch with an unfilled delay slot.
6597
           Using a bb;nop rather than extrs;bl saves 1 cycle for both
6598
           taken and untaken branches.  */
6599
        else if (dbr_sequence_length () == 0
6600
                 && ! forward_branch_p (insn)
6601
                 && INSN_ADDRESSES_SET_P ()
6602
                 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6603
                                    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6604
          {
6605
            strcpy (buf, "bb,");
6606
            if (GET_MODE (operands[0]) == DImode)
6607
              strcat (buf, "*");
6608
            if ((which == 0 && negated)
6609
                || (which == 1 && ! negated))
6610
              strcat (buf, ">=");
6611
            else
6612
              strcat (buf, "<");
6613
            if (negated)
6614
              strcat (buf, " %0,%1,%3%#");
6615
            else
6616
              strcat (buf, " %0,%1,%2%#");
6617
          }
6618
        else
6619
          {
6620
            if (GET_MODE (operands[0]) == DImode)
6621
              strcpy (buf, "extrd,s,*");
6622
            else
6623
              strcpy (buf, "{extrs,|extrw,s,}");
6624
            if ((which == 0 && negated)
6625
                || (which == 1 && ! negated))
6626
              strcat (buf, "<");
6627
            else
6628
              strcat (buf, ">=");
6629
            if (nullify && negated)
6630
              strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6631
            else if (nullify && ! negated)
6632
              strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6633
            else if (negated)
6634
              strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6635
            else
6636
              strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6637
          }
6638
        break;
6639
 
6640
      default:
6641
        /* The reversed conditional branch must branch over one additional
6642
           instruction if the delay slot is filled and needs to be extracted
6643
           by output_lbranch.  If the delay slot is empty or this is a
6644
           nullified forward branch, the instruction after the reversed
6645
           condition branch must be nullified.  */
6646
        if (dbr_sequence_length () == 0
6647
            || (nullify && forward_branch_p (insn)))
6648
          {
6649
            nullify = 1;
6650
            xdelay = 0;
6651
            operands[4] = GEN_INT (length);
6652
          }
6653
        else
6654
          {
6655
            xdelay = 1;
6656
            operands[4] = GEN_INT (length + 4);
6657
          }
6658
 
6659
        if (GET_MODE (operands[0]) == DImode)
6660
          strcpy (buf, "bb,*");
6661
        else
6662
          strcpy (buf, "bb,");
6663
        if ((which == 0 && negated)
6664
            || (which == 1 && !negated))
6665
          strcat (buf, "<");
6666
        else
6667
          strcat (buf, ">=");
6668
        if (nullify)
6669
          strcat (buf, ",n %0,%1,.+%4");
6670
        else
6671
          strcat (buf, " %0,%1,.+%4");
6672
        output_asm_insn (buf, operands);
6673
        return output_lbranch (negated ? operands[3] : operands[2],
6674
                               insn, xdelay);
6675
    }
6676
  return buf;
6677
}
6678
 
6679
/* This routine handles all the branch-on-variable-bit conditional branch
6680
   sequences we might need to generate.  It handles nullification of delay
6681
   slots, varying length branches, negated branches and all combinations
6682
   of the above.  it returns the appropriate output template to emit the
6683
   branch.  */
6684
 
6685
const char *
6686
output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6687
{
6688
  static char buf[100];
6689
  int useskip = 0;
6690
  int nullify = INSN_ANNULLED_BRANCH_P (insn);
6691
  int length = get_attr_length (insn);
6692
  int xdelay;
6693
 
6694
  /* A conditional branch to the following instruction (e.g. the delay slot) is
6695
     asking for a disaster.  I do not think this can happen as this pattern
6696
     is only used when optimizing; jump optimization should eliminate the
6697
     jump.  But be prepared just in case.  */
6698
 
6699
  if (branch_to_delay_slot_p (insn))
6700
    return "nop";
6701
 
6702
  /* If this is a long branch with its delay slot unfilled, set `nullify'
6703
     as it can nullify the delay slot and save a nop.  */
6704
  if (length == 8 && dbr_sequence_length () == 0)
6705
    nullify = 1;
6706
 
6707
  /* If this is a short forward conditional branch which did not get
6708
     its delay slot filled, the delay slot can still be nullified.  */
6709
  if (! nullify && length == 4 && dbr_sequence_length () == 0)
6710
    nullify = forward_branch_p (insn);
6711
 
6712
  /* A forward branch over a single nullified insn can be done with a
6713
     extrs instruction.  This avoids a single cycle penalty due to
6714
     mis-predicted branch if we fall through (branch not taken).  */
6715
 
6716
  if (length == 4
6717
      && next_real_insn (insn) != 0
6718
      && get_attr_length (next_real_insn (insn)) == 4
6719
      && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6720
      && nullify)
6721
    useskip = 1;
6722
 
6723
  switch (length)
6724
    {
6725
 
6726
      /* All short conditional branches except backwards with an unfilled
6727
         delay slot.  */
6728
      case 4:
6729
        if (useskip)
6730
          strcpy (buf, "{vextrs,|extrw,s,}");
6731
        else
6732
          strcpy (buf, "{bvb,|bb,}");
6733
        if (useskip && GET_MODE (operands[0]) == DImode)
6734
          strcpy (buf, "extrd,s,*");
6735
        else if (GET_MODE (operands[0]) == DImode)
6736
          strcpy (buf, "bb,*");
6737
        if ((which == 0 && negated)
6738
             || (which == 1 && ! negated))
6739
          strcat (buf, ">=");
6740
        else
6741
          strcat (buf, "<");
6742
        if (useskip)
6743
          strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6744
        else if (nullify && negated)
6745
          {
6746
            if (branch_needs_nop_p (insn))
6747
              strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6748
            else
6749
              strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6750
          }
6751
        else if (nullify && ! negated)
6752
          {
6753
            if (branch_needs_nop_p (insn))
6754
              strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6755
            else
6756
              strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6757
          }
6758
        else if (! nullify && negated)
6759
          strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6760
        else if (! nullify && ! negated)
6761
          strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6762
        break;
6763
 
6764
     /* All long conditionals.  Note a short backward branch with an
6765
        unfilled delay slot is treated just like a long backward branch
6766
        with an unfilled delay slot.  */
6767
      case 8:
6768
        /* Handle weird backwards branch with a filled delay slot
6769
           which is nullified.  */
6770
        if (dbr_sequence_length () != 0
6771
            && ! forward_branch_p (insn)
6772
            && nullify)
6773
          {
6774
            strcpy (buf, "{bvb,|bb,}");
6775
            if (GET_MODE (operands[0]) == DImode)
6776
              strcat (buf, "*");
6777
            if ((which == 0 && negated)
6778
                || (which == 1 && ! negated))
6779
              strcat (buf, "<");
6780
            else
6781
              strcat (buf, ">=");
6782
            if (negated)
6783
              strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6784
            else
6785
              strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6786
          }
6787
        /* Handle short backwards branch with an unfilled delay slot.
6788
           Using a bb;nop rather than extrs;bl saves 1 cycle for both
6789
           taken and untaken branches.  */
6790
        else if (dbr_sequence_length () == 0
6791
                 && ! forward_branch_p (insn)
6792
                 && INSN_ADDRESSES_SET_P ()
6793
                 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6794
                                    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6795
          {
6796
            strcpy (buf, "{bvb,|bb,}");
6797
            if (GET_MODE (operands[0]) == DImode)
6798
              strcat (buf, "*");
6799
            if ((which == 0 && negated)
6800
                || (which == 1 && ! negated))
6801
              strcat (buf, ">=");
6802
            else
6803
              strcat (buf, "<");
6804
            if (negated)
6805
              strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6806
            else
6807
              strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6808
          }
6809
        else
6810
          {
6811
            strcpy (buf, "{vextrs,|extrw,s,}");
6812
            if (GET_MODE (operands[0]) == DImode)
6813
              strcpy (buf, "extrd,s,*");
6814
            if ((which == 0 && negated)
6815
                || (which == 1 && ! negated))
6816
              strcat (buf, "<");
6817
            else
6818
              strcat (buf, ">=");
6819
            if (nullify && negated)
6820
              strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6821
            else if (nullify && ! negated)
6822
              strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6823
            else if (negated)
6824
              strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6825
            else
6826
              strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6827
          }
6828
        break;
6829
 
6830
      default:
6831
        /* The reversed conditional branch must branch over one additional
6832
           instruction if the delay slot is filled and needs to be extracted
6833
           by output_lbranch.  If the delay slot is empty or this is a
6834
           nullified forward branch, the instruction after the reversed
6835
           condition branch must be nullified.  */
6836
        if (dbr_sequence_length () == 0
6837
            || (nullify && forward_branch_p (insn)))
6838
          {
6839
            nullify = 1;
6840
            xdelay = 0;
6841
            operands[4] = GEN_INT (length);
6842
          }
6843
        else
6844
          {
6845
            xdelay = 1;
6846
            operands[4] = GEN_INT (length + 4);
6847
          }
6848
 
6849
        if (GET_MODE (operands[0]) == DImode)
6850
          strcpy (buf, "bb,*");
6851
        else
6852
          strcpy (buf, "{bvb,|bb,}");
6853
        if ((which == 0 && negated)
6854
            || (which == 1 && !negated))
6855
          strcat (buf, "<");
6856
        else
6857
          strcat (buf, ">=");
6858
        if (nullify)
6859
          strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6860
        else
6861
          strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6862
        output_asm_insn (buf, operands);
6863
        return output_lbranch (negated ? operands[3] : operands[2],
6864
                               insn, xdelay);
6865
    }
6866
  return buf;
6867
}
6868
 
6869
/* Return the output template for emitting a dbra type insn.
6870
 
6871
   Note it may perform some output operations on its own before
6872
   returning the final output string.  */
6873
const char *
6874
output_dbra (rtx *operands, rtx insn, int which_alternative)
6875
{
6876
  int length = get_attr_length (insn);
6877
 
6878
  /* A conditional branch to the following instruction (e.g. the delay slot) is
6879
     asking for a disaster.  Be prepared!  */
6880
 
6881
  if (branch_to_delay_slot_p (insn))
6882
    {
6883
      if (which_alternative == 0)
6884
        return "ldo %1(%0),%0";
6885
      else if (which_alternative == 1)
6886
        {
6887
          output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6888
          output_asm_insn ("ldw -16(%%r30),%4", operands);
6889
          output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6890
          return "{fldws|fldw} -16(%%r30),%0";
6891
        }
6892
      else
6893
        {
6894
          output_asm_insn ("ldw %0,%4", operands);
6895
          return "ldo %1(%4),%4\n\tstw %4,%0";
6896
        }
6897
    }
6898
 
6899
  if (which_alternative == 0)
6900
    {
6901
      int nullify = INSN_ANNULLED_BRANCH_P (insn);
6902
      int xdelay;
6903
 
6904
      /* If this is a long branch with its delay slot unfilled, set `nullify'
6905
         as it can nullify the delay slot and save a nop.  */
6906
      if (length == 8 && dbr_sequence_length () == 0)
6907
        nullify = 1;
6908
 
6909
      /* If this is a short forward conditional branch which did not get
6910
         its delay slot filled, the delay slot can still be nullified.  */
6911
      if (! nullify && length == 4 && dbr_sequence_length () == 0)
6912
        nullify = forward_branch_p (insn);
6913
 
6914
      switch (length)
6915
        {
6916
        case 4:
6917
          if (nullify)
6918
            {
6919
              if (branch_needs_nop_p (insn))
6920
                return "addib,%C2,n %1,%0,%3%#";
6921
              else
6922
                return "addib,%C2,n %1,%0,%3";
6923
            }
6924
          else
6925
            return "addib,%C2 %1,%0,%3";
6926
 
6927
        case 8:
6928
          /* Handle weird backwards branch with a fulled delay slot
6929
             which is nullified.  */
6930
          if (dbr_sequence_length () != 0
6931
              && ! forward_branch_p (insn)
6932
              && nullify)
6933
            return "addib,%N2,n %1,%0,.+12\n\tb %3";
6934
          /* Handle short backwards branch with an unfilled delay slot.
6935
             Using a addb;nop rather than addi;bl saves 1 cycle for both
6936
             taken and untaken branches.  */
6937
          else if (dbr_sequence_length () == 0
6938
                   && ! forward_branch_p (insn)
6939
                   && INSN_ADDRESSES_SET_P ()
6940
                   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6941
                                      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6942
              return "addib,%C2 %1,%0,%3%#";
6943
 
6944
          /* Handle normal cases.  */
6945
          if (nullify)
6946
            return "addi,%N2 %1,%0,%0\n\tb,n %3";
6947
          else
6948
            return "addi,%N2 %1,%0,%0\n\tb %3";
6949
 
6950
        default:
6951
          /* The reversed conditional branch must branch over one additional
6952
             instruction if the delay slot is filled and needs to be extracted
6953
             by output_lbranch.  If the delay slot is empty or this is a
6954
             nullified forward branch, the instruction after the reversed
6955
             condition branch must be nullified.  */
6956
          if (dbr_sequence_length () == 0
6957
              || (nullify && forward_branch_p (insn)))
6958
            {
6959
              nullify = 1;
6960
              xdelay = 0;
6961
              operands[4] = GEN_INT (length);
6962
            }
6963
          else
6964
            {
6965
              xdelay = 1;
6966
              operands[4] = GEN_INT (length + 4);
6967
            }
6968
 
6969
          if (nullify)
6970
            output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
6971
          else
6972
            output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
6973
 
6974
          return output_lbranch (operands[3], insn, xdelay);
6975
        }
6976
 
6977
    }
6978
  /* Deal with gross reload from FP register case.  */
6979
  else if (which_alternative == 1)
6980
    {
6981
      /* Move loop counter from FP register to MEM then into a GR,
6982
         increment the GR, store the GR into MEM, and finally reload
6983
         the FP register from MEM from within the branch's delay slot.  */
6984
      output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6985
                       operands);
6986
      output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6987
      if (length == 24)
6988
        return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6989
      else if (length == 28)
6990
        return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6991
      else
6992
        {
6993
          operands[5] = GEN_INT (length - 16);
6994
          output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
6995
          output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6996
          return output_lbranch (operands[3], insn, 0);
6997
        }
6998
    }
6999
  /* Deal with gross reload from memory case.  */
7000
  else
7001
    {
7002
      /* Reload loop counter from memory, the store back to memory
7003
         happens in the branch's delay slot.  */
7004
      output_asm_insn ("ldw %0,%4", operands);
7005
      if (length == 12)
7006
        return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7007
      else if (length == 16)
7008
        return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7009
      else
7010
        {
7011
          operands[5] = GEN_INT (length - 4);
7012
          output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7013
          return output_lbranch (operands[3], insn, 0);
7014
        }
7015
    }
7016
}
7017
 
7018
/* Return the output template for emitting a movb type insn.
7019
 
7020
   Note it may perform some output operations on its own before
7021
   returning the final output string.  */
7022
const char *
7023
output_movb (rtx *operands, rtx insn, int which_alternative,
7024
             int reverse_comparison)
7025
{
7026
  int length = get_attr_length (insn);
7027
 
7028
  /* A conditional branch to the following instruction (e.g. the delay slot) is
7029
     asking for a disaster.  Be prepared!  */
7030
 
7031
  if (branch_to_delay_slot_p (insn))
7032
    {
7033
      if (which_alternative == 0)
7034
        return "copy %1,%0";
7035
      else if (which_alternative == 1)
7036
        {
7037
          output_asm_insn ("stw %1,-16(%%r30)", operands);
7038
          return "{fldws|fldw} -16(%%r30),%0";
7039
        }
7040
      else if (which_alternative == 2)
7041
        return "stw %1,%0";
7042
      else
7043
        return "mtsar %r1";
7044
    }
7045
 
7046
  /* Support the second variant.  */
7047
  if (reverse_comparison)
7048
    PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7049
 
7050
  if (which_alternative == 0)
7051
    {
7052
      int nullify = INSN_ANNULLED_BRANCH_P (insn);
7053
      int xdelay;
7054
 
7055
      /* If this is a long branch with its delay slot unfilled, set `nullify'
7056
         as it can nullify the delay slot and save a nop.  */
7057
      if (length == 8 && dbr_sequence_length () == 0)
7058
        nullify = 1;
7059
 
7060
      /* If this is a short forward conditional branch which did not get
7061
         its delay slot filled, the delay slot can still be nullified.  */
7062
      if (! nullify && length == 4 && dbr_sequence_length () == 0)
7063
        nullify = forward_branch_p (insn);
7064
 
7065
      switch (length)
7066
        {
7067
        case 4:
7068
          if (nullify)
7069
            {
7070
              if (branch_needs_nop_p (insn))
7071
                return "movb,%C2,n %1,%0,%3%#";
7072
              else
7073
                return "movb,%C2,n %1,%0,%3";
7074
            }
7075
          else
7076
            return "movb,%C2 %1,%0,%3";
7077
 
7078
        case 8:
7079
          /* Handle weird backwards branch with a filled delay slot
7080
             which is nullified.  */
7081
          if (dbr_sequence_length () != 0
7082
              && ! forward_branch_p (insn)
7083
              && nullify)
7084
            return "movb,%N2,n %1,%0,.+12\n\tb %3";
7085
 
7086
          /* Handle short backwards branch with an unfilled delay slot.
7087
             Using a movb;nop rather than or;bl saves 1 cycle for both
7088
             taken and untaken branches.  */
7089
          else if (dbr_sequence_length () == 0
7090
                   && ! forward_branch_p (insn)
7091
                   && INSN_ADDRESSES_SET_P ()
7092
                   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7093
                                      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7094
            return "movb,%C2 %1,%0,%3%#";
7095
          /* Handle normal cases.  */
7096
          if (nullify)
7097
            return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7098
          else
7099
            return "or,%N2 %1,%%r0,%0\n\tb %3";
7100
 
7101
        default:
7102
          /* The reversed conditional branch must branch over one additional
7103
             instruction if the delay slot is filled and needs to be extracted
7104
             by output_lbranch.  If the delay slot is empty or this is a
7105
             nullified forward branch, the instruction after the reversed
7106
             condition branch must be nullified.  */
7107
          if (dbr_sequence_length () == 0
7108
              || (nullify && forward_branch_p (insn)))
7109
            {
7110
              nullify = 1;
7111
              xdelay = 0;
7112
              operands[4] = GEN_INT (length);
7113
            }
7114
          else
7115
            {
7116
              xdelay = 1;
7117
              operands[4] = GEN_INT (length + 4);
7118
            }
7119
 
7120
          if (nullify)
7121
            output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7122
          else
7123
            output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7124
 
7125
          return output_lbranch (operands[3], insn, xdelay);
7126
        }
7127
    }
7128
  /* Deal with gross reload for FP destination register case.  */
7129
  else if (which_alternative == 1)
7130
    {
7131
      /* Move source register to MEM, perform the branch test, then
7132
         finally load the FP register from MEM from within the branch's
7133
         delay slot.  */
7134
      output_asm_insn ("stw %1,-16(%%r30)", operands);
7135
      if (length == 12)
7136
        return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7137
      else if (length == 16)
7138
        return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7139
      else
7140
        {
7141
          operands[4] = GEN_INT (length - 4);
7142
          output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7143
          output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7144
          return output_lbranch (operands[3], insn, 0);
7145
        }
7146
    }
7147
  /* Deal with gross reload from memory case.  */
7148
  else if (which_alternative == 2)
7149
    {
7150
      /* Reload loop counter from memory, the store back to memory
7151
         happens in the branch's delay slot.  */
7152
      if (length == 8)
7153
        return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7154
      else if (length == 12)
7155
        return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7156
      else
7157
        {
7158
          operands[4] = GEN_INT (length);
7159
          output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7160
                           operands);
7161
          return output_lbranch (operands[3], insn, 0);
7162
        }
7163
    }
7164
  /* Handle SAR as a destination.  */
7165
  else
7166
    {
7167
      if (length == 8)
7168
        return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7169
      else if (length == 12)
7170
        return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7171
      else
7172
        {
7173
          operands[4] = GEN_INT (length);
7174
          output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7175
                           operands);
7176
          return output_lbranch (operands[3], insn, 0);
7177
        }
7178
    }
7179
}
7180
 
7181
/* Copy any FP arguments in INSN into integer registers.  */
7182
static void
7183
copy_fp_args (rtx insn)
7184
{
7185
  rtx link;
7186
  rtx xoperands[2];
7187
 
7188
  for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7189
    {
7190
      int arg_mode, regno;
7191
      rtx use = XEXP (link, 0);
7192
 
7193
      if (! (GET_CODE (use) == USE
7194
          && GET_CODE (XEXP (use, 0)) == REG
7195
          && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7196
        continue;
7197
 
7198
      arg_mode = GET_MODE (XEXP (use, 0));
7199
      regno = REGNO (XEXP (use, 0));
7200
 
7201
      /* Is it a floating point register?  */
7202
      if (regno >= 32 && regno <= 39)
7203
        {
7204
          /* Copy the FP register into an integer register via memory.  */
7205
          if (arg_mode == SFmode)
7206
            {
7207
              xoperands[0] = XEXP (use, 0);
7208
              xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7209
              output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7210
              output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7211
            }
7212
          else
7213
            {
7214
              xoperands[0] = XEXP (use, 0);
7215
              xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7216
              output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7217
              output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7218
              output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7219
            }
7220
        }
7221
    }
7222
}
7223
 
7224
/* Compute length of the FP argument copy sequence for INSN.  */
7225
static int
7226
length_fp_args (rtx insn)
7227
{
7228
  int length = 0;
7229
  rtx link;
7230
 
7231
  for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7232
    {
7233
      int arg_mode, regno;
7234
      rtx use = XEXP (link, 0);
7235
 
7236
      if (! (GET_CODE (use) == USE
7237
          && GET_CODE (XEXP (use, 0)) == REG
7238
          && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7239
        continue;
7240
 
7241
      arg_mode = GET_MODE (XEXP (use, 0));
7242
      regno = REGNO (XEXP (use, 0));
7243
 
7244
      /* Is it a floating point register?  */
7245
      if (regno >= 32 && regno <= 39)
7246
        {
7247
          if (arg_mode == SFmode)
7248
            length += 8;
7249
          else
7250
            length += 12;
7251
        }
7252
    }
7253
 
7254
  return length;
7255
}
7256
 
7257
/* Return the attribute length for the millicode call instruction INSN.
7258
   The length must match the code generated by output_millicode_call.
7259
   We include the delay slot in the returned length as it is better to
7260
   over estimate the length than to under estimate it.  */
7261
 
7262
int
7263
attr_length_millicode_call (rtx insn)
7264
{
7265
  unsigned long distance = -1;
7266
  unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7267
 
7268
  if (INSN_ADDRESSES_SET_P ())
7269
    {
7270
      distance = (total + insn_current_reference_address (insn));
7271
      if (distance < total)
7272
        distance = -1;
7273
    }
7274
 
7275
  if (TARGET_64BIT)
7276
    {
7277
      if (!TARGET_LONG_CALLS && distance < 7600000)
7278
        return 8;
7279
 
7280
      return 20;
7281
    }
7282
  else if (TARGET_PORTABLE_RUNTIME)
7283
    return 24;
7284
  else
7285
    {
7286
      if (!TARGET_LONG_CALLS && distance < 240000)
7287
        return 8;
7288
 
7289
      if (TARGET_LONG_ABS_CALL && !flag_pic)
7290
        return 12;
7291
 
7292
      return 24;
7293
    }
7294
}
7295
 
7296
/* INSN is a function call.  It may have an unconditional jump
7297
   in its delay slot.
7298
 
7299
   CALL_DEST is the routine we are calling.  */
7300
 
7301
const char *
7302
output_millicode_call (rtx insn, rtx call_dest)
7303
{
7304
  int attr_length = get_attr_length (insn);
7305
  int seq_length = dbr_sequence_length ();
7306
  int distance;
7307
  rtx seq_insn;
7308
  rtx xoperands[3];
7309
 
7310
  xoperands[0] = call_dest;
7311
  xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7312
 
7313
  /* Handle the common case where we are sure that the branch will
7314
     reach the beginning of the $CODE$ subspace.  The within reach
7315
     form of the $$sh_func_adrs call has a length of 28.  Because
7316
     it has an attribute type of multi, it never has a nonzero
7317
     sequence length.  The length of the $$sh_func_adrs is the same
7318
     as certain out of reach PIC calls to other routines.  */
7319
  if (!TARGET_LONG_CALLS
7320
      && ((seq_length == 0
7321
           && (attr_length == 12
7322
               || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7323
          || (seq_length != 0 && attr_length == 8)))
7324
    {
7325
      output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7326
    }
7327
  else
7328
    {
7329
      if (TARGET_64BIT)
7330
        {
7331
          /* It might seem that one insn could be saved by accessing
7332
             the millicode function using the linkage table.  However,
7333
             this doesn't work in shared libraries and other dynamically
7334
             loaded objects.  Using a pc-relative sequence also avoids
7335
             problems related to the implicit use of the gp register.  */
7336
          output_asm_insn ("b,l .+8,%%r1", xoperands);
7337
 
7338
          if (TARGET_GAS)
7339
            {
7340
              output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7341
              output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7342
            }
7343
          else
7344
            {
7345
              xoperands[1] = gen_label_rtx ();
7346
              output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7347
              targetm.asm_out.internal_label (asm_out_file, "L",
7348
                                         CODE_LABEL_NUMBER (xoperands[1]));
7349
              output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7350
            }
7351
 
7352
          output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7353
        }
7354
      else if (TARGET_PORTABLE_RUNTIME)
7355
        {
7356
          /* Pure portable runtime doesn't allow be/ble; we also don't
7357
             have PIC support in the assembler/linker, so this sequence
7358
             is needed.  */
7359
 
7360
          /* Get the address of our target into %r1.  */
7361
          output_asm_insn ("ldil L'%0,%%r1", xoperands);
7362
          output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7363
 
7364
          /* Get our return address into %r31.  */
7365
          output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7366
          output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7367
 
7368
          /* Jump to our target address in %r1.  */
7369
          output_asm_insn ("bv %%r0(%%r1)", xoperands);
7370
        }
7371
      else if (!flag_pic)
7372
        {
7373
          output_asm_insn ("ldil L'%0,%%r1", xoperands);
7374
          if (TARGET_PA_20)
7375
            output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7376
          else
7377
            output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7378
        }
7379
      else
7380
        {
7381
          output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7382
          output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7383
 
7384
          if (TARGET_SOM || !TARGET_GAS)
7385
            {
7386
              /* The HP assembler can generate relocations for the
7387
                 difference of two symbols.  GAS can do this for a
7388
                 millicode symbol but not an arbitrary external
7389
                 symbol when generating SOM output.  */
7390
              xoperands[1] = gen_label_rtx ();
7391
              targetm.asm_out.internal_label (asm_out_file, "L",
7392
                                         CODE_LABEL_NUMBER (xoperands[1]));
7393
              output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7394
              output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7395
            }
7396
          else
7397
            {
7398
              output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7399
              output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7400
                               xoperands);
7401
            }
7402
 
7403
          /* Jump to our target address in %r1.  */
7404
          output_asm_insn ("bv %%r0(%%r1)", xoperands);
7405
        }
7406
    }
7407
 
7408
  if (seq_length == 0)
7409
    output_asm_insn ("nop", xoperands);
7410
 
7411
  /* We are done if there isn't a jump in the delay slot.  */
7412
  if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7413
    return "";
7414
 
7415
  /* This call has an unconditional jump in its delay slot.  */
7416
  xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7417
 
7418
  /* See if the return address can be adjusted.  Use the containing
7419
     sequence insn's address.  */
7420
  if (INSN_ADDRESSES_SET_P ())
7421
    {
7422
      seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7423
      distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7424
                  - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7425
 
7426
      if (VAL_14_BITS_P (distance))
7427
        {
7428
          xoperands[1] = gen_label_rtx ();
7429
          output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7430
          targetm.asm_out.internal_label (asm_out_file, "L",
7431
                                          CODE_LABEL_NUMBER (xoperands[1]));
7432
        }
7433
      else
7434
        /* ??? This branch may not reach its target.  */
7435
        output_asm_insn ("nop\n\tb,n %0", xoperands);
7436
    }
7437
  else
7438
    /* ??? This branch may not reach its target.  */
7439
    output_asm_insn ("nop\n\tb,n %0", xoperands);
7440
 
7441
  /* Delete the jump.  */
7442
  SET_INSN_DELETED (NEXT_INSN (insn));
7443
 
7444
  return "";
7445
}
7446
 
7447
/* Return the attribute length of the call instruction INSN.  The SIBCALL
7448
   flag indicates whether INSN is a regular call or a sibling call.  The
7449
   length returned must be longer than the code actually generated by
7450
   output_call.  Since branch shortening is done before delay branch
7451
   sequencing, there is no way to determine whether or not the delay
7452
   slot will be filled during branch shortening.  Even when the delay
7453
   slot is filled, we may have to add a nop if the delay slot contains
7454
   a branch that can't reach its target.  Thus, we always have to include
7455
   the delay slot in the length estimate.  This used to be done in
7456
   pa_adjust_insn_length but we do it here now as some sequences always
7457
   fill the delay slot and we can save four bytes in the estimate for
7458
   these sequences.  */
7459
 
7460
int
7461
attr_length_call (rtx insn, int sibcall)
7462
{
7463
  int local_call;
7464
  rtx call, call_dest;
7465
  tree call_decl;
7466
  int length = 0;
7467
  rtx pat = PATTERN (insn);
7468
  unsigned long distance = -1;
7469
 
7470
  gcc_assert (GET_CODE (insn) == CALL_INSN);
7471
 
7472
  if (INSN_ADDRESSES_SET_P ())
7473
    {
7474
      unsigned long total;
7475
 
7476
      total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7477
      distance = (total + insn_current_reference_address (insn));
7478
      if (distance < total)
7479
        distance = -1;
7480
    }
7481
 
7482
  gcc_assert (GET_CODE (pat) == PARALLEL);
7483
 
7484
  /* Get the call rtx.  */
7485
  call = XVECEXP (pat, 0, 0);
7486
  if (GET_CODE (call) == SET)
7487
    call = SET_SRC (call);
7488
 
7489
  gcc_assert (GET_CODE (call) == CALL);
7490
 
7491
  /* Determine if this is a local call.  */
7492
  call_dest = XEXP (XEXP (call, 0), 0);
7493
  call_decl = SYMBOL_REF_DECL (call_dest);
7494
  local_call = call_decl && targetm.binds_local_p (call_decl);
7495
 
7496
  /* pc-relative branch.  */
7497
  if (!TARGET_LONG_CALLS
7498
      && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7499
          || distance < 240000))
7500
    length += 8;
7501
 
7502
  /* 64-bit plabel sequence.  */
7503
  else if (TARGET_64BIT && !local_call)
7504
    length += sibcall ? 28 : 24;
7505
 
7506
  /* non-pic long absolute branch sequence.  */
7507
  else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7508
    length += 12;
7509
 
7510
  /* long pc-relative branch sequence.  */
7511
  else if (TARGET_LONG_PIC_SDIFF_CALL
7512
           || (TARGET_GAS && !TARGET_SOM
7513
               && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7514
    {
7515
      length += 20;
7516
 
7517
      if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7518
        length += 8;
7519
    }
7520
 
7521
  /* 32-bit plabel sequence.  */
7522
  else
7523
    {
7524
      length += 32;
7525
 
7526
      if (TARGET_SOM)
7527
        length += length_fp_args (insn);
7528
 
7529
      if (flag_pic)
7530
        length += 4;
7531
 
7532
      if (!TARGET_PA_20)
7533
        {
7534
          if (!sibcall)
7535
            length += 8;
7536
 
7537
          if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7538
            length += 8;
7539
        }
7540
    }
7541
 
7542
  return length;
7543
}
7544
 
7545
/* INSN is a function call.  It may have an unconditional jump
7546
   in its delay slot.
7547
 
7548
   CALL_DEST is the routine we are calling.  */
7549
 
7550
const char *
7551
output_call (rtx insn, rtx call_dest, int sibcall)
7552
{
7553
  int delay_insn_deleted = 0;
7554
  int delay_slot_filled = 0;
7555
  int seq_length = dbr_sequence_length ();
7556
  tree call_decl = SYMBOL_REF_DECL (call_dest);
7557
  int local_call = call_decl && targetm.binds_local_p (call_decl);
7558
  rtx xoperands[2];
7559
 
7560
  xoperands[0] = call_dest;
7561
 
7562
  /* Handle the common case where we're sure that the branch will reach
7563
     the beginning of the "$CODE$" subspace.  This is the beginning of
7564
     the current function if we are in a named section.  */
7565
  if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7566
    {
7567
      xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7568
      output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7569
    }
7570
  else
7571
    {
7572
      if (TARGET_64BIT && !local_call)
7573
        {
7574
          /* ??? As far as I can tell, the HP linker doesn't support the
7575
             long pc-relative sequence described in the 64-bit runtime
7576
             architecture.  So, we use a slightly longer indirect call.  */
7577
          xoperands[0] = get_deferred_plabel (call_dest);
7578
          xoperands[1] = gen_label_rtx ();
7579
 
7580
          /* If this isn't a sibcall, we put the load of %r27 into the
7581
             delay slot.  We can't do this in a sibcall as we don't
7582
             have a second call-clobbered scratch register available.  */
7583
          if (seq_length != 0
7584
              && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7585
              && !sibcall)
7586
            {
7587
              final_scan_insn (NEXT_INSN (insn), asm_out_file,
7588
                               optimize, 0, NULL);
7589
 
7590
              /* Now delete the delay insn.  */
7591
              SET_INSN_DELETED (NEXT_INSN (insn));
7592
              delay_insn_deleted = 1;
7593
            }
7594
 
7595
          output_asm_insn ("addil LT'%0,%%r27", xoperands);
7596
          output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7597
          output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7598
 
7599
          if (sibcall)
7600
            {
7601
              output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7602
              output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7603
              output_asm_insn ("bve (%%r1)", xoperands);
7604
            }
7605
          else
7606
            {
7607
              output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7608
              output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7609
              output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7610
              delay_slot_filled = 1;
7611
            }
7612
        }
7613
      else
7614
        {
7615
          int indirect_call = 0;
7616
 
7617
          /* Emit a long call.  There are several different sequences
7618
             of increasing length and complexity.  In most cases,
7619
             they don't allow an instruction in the delay slot.  */
7620
          if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7621
              && !TARGET_LONG_PIC_SDIFF_CALL
7622
              && !(TARGET_GAS && !TARGET_SOM
7623
                   && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7624
              && !TARGET_64BIT)
7625
            indirect_call = 1;
7626
 
7627
          if (seq_length != 0
7628
              && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7629
              && !sibcall
7630
              && (!TARGET_PA_20
7631
                  || indirect_call
7632
                  || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7633
            {
7634
              /* A non-jump insn in the delay slot.  By definition we can
7635
                 emit this insn before the call (and in fact before argument
7636
                 relocating.  */
7637
              final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7638
                               NULL);
7639
 
7640
              /* Now delete the delay insn.  */
7641
              SET_INSN_DELETED (NEXT_INSN (insn));
7642
              delay_insn_deleted = 1;
7643
            }
7644
 
7645
          if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7646
            {
7647
              /* This is the best sequence for making long calls in
7648
                 non-pic code.  Unfortunately, GNU ld doesn't provide
7649
                 the stub needed for external calls, and GAS's support
7650
                 for this with the SOM linker is buggy.  It is safe
7651
                 to use this for local calls.  */
7652
              output_asm_insn ("ldil L'%0,%%r1", xoperands);
7653
              if (sibcall)
7654
                output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7655
              else
7656
                {
7657
                  if (TARGET_PA_20)
7658
                    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7659
                                     xoperands);
7660
                  else
7661
                    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7662
 
7663
                  output_asm_insn ("copy %%r31,%%r2", xoperands);
7664
                  delay_slot_filled = 1;
7665
                }
7666
            }
7667
          else
7668
            {
7669
              if (TARGET_LONG_PIC_SDIFF_CALL)
7670
                {
7671
                  /* The HP assembler and linker can handle relocations
7672
                     for the difference of two symbols.  The HP assembler
7673
                     recognizes the sequence as a pc-relative call and
7674
                     the linker provides stubs when needed.  */
7675
                  xoperands[1] = gen_label_rtx ();
7676
                  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7677
                  output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7678
                  targetm.asm_out.internal_label (asm_out_file, "L",
7679
                                             CODE_LABEL_NUMBER (xoperands[1]));
7680
                  output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7681
                }
7682
              else if (TARGET_GAS && !TARGET_SOM
7683
                       && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7684
                {
7685
                  /*  GAS currently can't generate the relocations that
7686
                      are needed for the SOM linker under HP-UX using this
7687
                      sequence.  The GNU linker doesn't generate the stubs
7688
                      that are needed for external calls on TARGET_ELF32
7689
                      with this sequence.  For now, we have to use a
7690
                      longer plabel sequence when using GAS.  */
7691
                  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7692
                  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7693
                                   xoperands);
7694
                  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7695
                                   xoperands);
7696
                }
7697
              else
7698
                {
7699
                  /* Emit a long plabel-based call sequence.  This is
7700
                     essentially an inline implementation of $$dyncall.
7701
                     We don't actually try to call $$dyncall as this is
7702
                     as difficult as calling the function itself.  */
7703
                  xoperands[0] = get_deferred_plabel (call_dest);
7704
                  xoperands[1] = gen_label_rtx ();
7705
 
7706
                  /* Since the call is indirect, FP arguments in registers
7707
                     need to be copied to the general registers.  Then, the
7708
                     argument relocation stub will copy them back.  */
7709
                  if (TARGET_SOM)
7710
                    copy_fp_args (insn);
7711
 
7712
                  if (flag_pic)
7713
                    {
7714
                      output_asm_insn ("addil LT'%0,%%r19", xoperands);
7715
                      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7716
                      output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7717
                    }
7718
                  else
7719
                    {
7720
                      output_asm_insn ("addil LR'%0-$global$,%%r27",
7721
                                       xoperands);
7722
                      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7723
                                       xoperands);
7724
                    }
7725
 
7726
                  output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7727
                  output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7728
                  output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7729
                  output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7730
 
7731
                  if (!sibcall && !TARGET_PA_20)
7732
                    {
7733
                      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7734
                      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7735
                        output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7736
                      else
7737
                        output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7738
                    }
7739
                }
7740
 
7741
              if (TARGET_PA_20)
7742
                {
7743
                  if (sibcall)
7744
                    output_asm_insn ("bve (%%r1)", xoperands);
7745
                  else
7746
                    {
7747
                      if (indirect_call)
7748
                        {
7749
                          output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7750
                          output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7751
                          delay_slot_filled = 1;
7752
                        }
7753
                      else
7754
                        output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7755
                    }
7756
                }
7757
              else
7758
                {
7759
                  if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7760
                    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7761
                                     xoperands);
7762
 
7763
                  if (sibcall)
7764
                    {
7765
                      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7766
                        output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7767
                      else
7768
                        output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7769
                    }
7770
                  else
7771
                    {
7772
                      if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
7773
                        output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7774
                      else
7775
                        output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7776
 
7777
                      if (indirect_call)
7778
                        output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7779
                      else
7780
                        output_asm_insn ("copy %%r31,%%r2", xoperands);
7781
                      delay_slot_filled = 1;
7782
                    }
7783
                }
7784
            }
7785
        }
7786
    }
7787
 
7788
  if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7789
    output_asm_insn ("nop", xoperands);
7790
 
7791
  /* We are done if there isn't a jump in the delay slot.  */
7792
  if (seq_length == 0
7793
      || delay_insn_deleted
7794
      || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7795
    return "";
7796
 
7797
  /* A sibcall should never have a branch in the delay slot.  */
7798
  gcc_assert (!sibcall);
7799
 
7800
  /* This call has an unconditional jump in its delay slot.  */
7801
  xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7802
 
7803
  if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7804
    {
7805
      /* See if the return address can be adjusted.  Use the containing
7806
         sequence insn's address.  This would break the regular call/return@
7807
         relationship assumed by the table based eh unwinder, so only do that
7808
         if the call is not possibly throwing.  */
7809
      rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7810
      int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7811
                      - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7812
 
7813
      if (VAL_14_BITS_P (distance)
7814
          && !(can_throw_internal (insn) || can_throw_external (insn)))
7815
        {
7816
          xoperands[1] = gen_label_rtx ();
7817
          output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7818
          targetm.asm_out.internal_label (asm_out_file, "L",
7819
                                          CODE_LABEL_NUMBER (xoperands[1]));
7820
        }
7821
      else
7822
        output_asm_insn ("nop\n\tb,n %0", xoperands);
7823
    }
7824
  else
7825
    output_asm_insn ("b,n %0", xoperands);
7826
 
7827
  /* Delete the jump.  */
7828
  SET_INSN_DELETED (NEXT_INSN (insn));
7829
 
7830
  return "";
7831
}
7832
 
7833
/* Return the attribute length of the indirect call instruction INSN.
7834
   The length must match the code generated by output_indirect call.
7835
   The returned length includes the delay slot.  Currently, the delay
7836
   slot of an indirect call sequence is not exposed and it is used by
7837
   the sequence itself.  */
7838
 
7839
int
7840
attr_length_indirect_call (rtx insn)
7841
{
7842
  unsigned long distance = -1;
7843
  unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7844
 
7845
  if (INSN_ADDRESSES_SET_P ())
7846
    {
7847
      distance = (total + insn_current_reference_address (insn));
7848
      if (distance < total)
7849
        distance = -1;
7850
    }
7851
 
7852
  if (TARGET_64BIT)
7853
    return 12;
7854
 
7855
  if (TARGET_FAST_INDIRECT_CALLS
7856
      || (!TARGET_PORTABLE_RUNTIME
7857
          && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7858
              || distance < 240000)))
7859
    return 8;
7860
 
7861
  if (flag_pic)
7862
    return 24;
7863
 
7864
  if (TARGET_PORTABLE_RUNTIME)
7865
    return 20;
7866
 
7867
  /* Out of reach, can use ble.  */
7868
  return 12;
7869
}
7870
 
7871
const char *
7872
output_indirect_call (rtx insn, rtx call_dest)
7873
{
7874
  rtx xoperands[1];
7875
 
7876
  if (TARGET_64BIT)
7877
    {
7878
      xoperands[0] = call_dest;
7879
      output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7880
      output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7881
      return "";
7882
    }
7883
 
7884
  /* First the special case for kernels, level 0 systems, etc.  */
7885
  if (TARGET_FAST_INDIRECT_CALLS)
7886
    return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7887
 
7888
  /* Now the normal case -- we can reach $$dyncall directly or
7889
     we're sure that we can get there via a long-branch stub.
7890
 
7891
     No need to check target flags as the length uniquely identifies
7892
     the remaining cases.  */
7893
  if (attr_length_indirect_call (insn) == 8)
7894
    {
7895
      /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7896
         $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
7897
         variant of the B,L instruction can't be used on the SOM target.  */
7898
      if (TARGET_PA_20 && !TARGET_SOM)
7899
        return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7900
      else
7901
        return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7902
    }
7903
 
7904
  /* Long millicode call, but we are not generating PIC or portable runtime
7905
     code.  */
7906
  if (attr_length_indirect_call (insn) == 12)
7907
    return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7908
 
7909
  /* Long millicode call for portable runtime.  */
7910
  if (attr_length_indirect_call (insn) == 20)
7911
    return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7912
 
7913
  /* We need a long PIC call to $$dyncall.  */
7914
  xoperands[0] = NULL_RTX;
7915
  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7916
  if (TARGET_SOM || !TARGET_GAS)
7917
    {
7918
      xoperands[0] = gen_label_rtx ();
7919
      output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7920
      targetm.asm_out.internal_label (asm_out_file, "L",
7921
                                      CODE_LABEL_NUMBER (xoperands[0]));
7922
      output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7923
    }
7924
  else
7925
    {
7926
      output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7927
      output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7928
                       xoperands);
7929
    }
7930
  output_asm_insn ("blr %%r0,%%r2", xoperands);
7931
  output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7932
  return "";
7933
}
7934
 
7935
/* Return the total length of the save and restore instructions needed for
7936
   the data linkage table pointer (i.e., the PIC register) across the call
7937
   instruction INSN.  No-return calls do not require a save and restore.
7938
   In addition, we may be able to avoid the save and restore for calls
7939
   within the same translation unit.  */
7940
 
7941
int
7942
attr_length_save_restore_dltp (rtx insn)
7943
{
7944
  if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7945
    return 0;
7946
 
7947
  return 8;
7948
}
7949
 
7950
/* In HPUX 8.0's shared library scheme, special relocations are needed
7951
   for function labels if they might be passed to a function
7952
   in a shared library (because shared libraries don't live in code
7953
   space), and special magic is needed to construct their address.  */
7954
 
7955
void
7956
hppa_encode_label (rtx sym)
7957
{
7958
  const char *str = XSTR (sym, 0);
7959
  int len = strlen (str) + 1;
7960
  char *newstr, *p;
7961
 
7962
  p = newstr = XALLOCAVEC (char, len + 1);
7963
  *p++ = '@';
7964
  strcpy (p, str);
7965
 
7966
  XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7967
}
7968
 
7969
static void
7970
pa_encode_section_info (tree decl, rtx rtl, int first)
7971
{
7972
  int old_referenced = 0;
7973
 
7974
  if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
7975
    old_referenced
7976
      = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
7977
 
7978
  default_encode_section_info (decl, rtl, first);
7979
 
7980
  if (first && TEXT_SPACE_P (decl))
7981
    {
7982
      SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7983
      if (TREE_CODE (decl) == FUNCTION_DECL)
7984
        hppa_encode_label (XEXP (rtl, 0));
7985
    }
7986
  else if (old_referenced)
7987
    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
7988
}
7989
 
7990
/* This is sort of inverse to pa_encode_section_info.  */
7991
 
7992
static const char *
7993
pa_strip_name_encoding (const char *str)
7994
{
7995
  str += (*str == '@');
7996
  str += (*str == '*');
7997
  return str;
7998
}
7999
 
8000
int
8001
function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8002
{
8003
  return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
8004
}
8005
 
8006
/* Returns 1 if OP is a function label involved in a simple addition
8007
   with a constant.  Used to keep certain patterns from matching
8008
   during instruction combination.  */
8009
int
8010
is_function_label_plus_const (rtx op)
8011
{
8012
  /* Strip off any CONST.  */
8013
  if (GET_CODE (op) == CONST)
8014
    op = XEXP (op, 0);
8015
 
8016
  return (GET_CODE (op) == PLUS
8017
          && function_label_operand (XEXP (op, 0), Pmode)
8018
          && GET_CODE (XEXP (op, 1)) == CONST_INT);
8019
}
8020
 
8021
/* Output assembly code for a thunk to FUNCTION.  */
8022
 
8023
static void
8024
pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8025
                        HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8026
                        tree function)
8027
{
8028
  static unsigned int current_thunk_number;
8029
  int val_14 = VAL_14_BITS_P (delta);
8030
  unsigned int old_last_address = last_address, nbytes = 0;
8031
  char label[16];
8032
  rtx xoperands[4];
8033
 
8034
  xoperands[0] = XEXP (DECL_RTL (function), 0);
8035
  xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8036
  xoperands[2] = GEN_INT (delta);
8037
 
8038
  ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8039
  fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8040
 
8041
  /* Output the thunk.  We know that the function is in the same
8042
     translation unit (i.e., the same space) as the thunk, and that
8043
     thunks are output after their method.  Thus, we don't need an
8044
     external branch to reach the function.  With SOM and GAS,
8045
     functions and thunks are effectively in different sections.
8046
     Thus, we can always use a IA-relative branch and the linker
8047
     will add a long branch stub if necessary.
8048
 
8049
     However, we have to be careful when generating PIC code on the
8050
     SOM port to ensure that the sequence does not transfer to an
8051
     import stub for the target function as this could clobber the
8052
     return value saved at SP-24.  This would also apply to the
8053
     32-bit linux port if the multi-space model is implemented.  */
8054
  if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8055
       && !(flag_pic && TREE_PUBLIC (function))
8056
       && (TARGET_GAS || last_address < 262132))
8057
      || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8058
          && ((targetm.have_named_sections
8059
               && DECL_SECTION_NAME (thunk_fndecl) != NULL
8060
               /* The GNU 64-bit linker has rather poor stub management.
8061
                  So, we use a long branch from thunks that aren't in
8062
                  the same section as the target function.  */
8063
               && ((!TARGET_64BIT
8064
                    && (DECL_SECTION_NAME (thunk_fndecl)
8065
                        != DECL_SECTION_NAME (function)))
8066
                   || ((DECL_SECTION_NAME (thunk_fndecl)
8067
                        == DECL_SECTION_NAME (function))
8068
                       && last_address < 262132)))
8069
              || (targetm.have_named_sections
8070
                  && DECL_SECTION_NAME (thunk_fndecl) == NULL
8071
                  && DECL_SECTION_NAME (function) == NULL
8072
                  && last_address < 262132)
8073
              || (!targetm.have_named_sections && last_address < 262132))))
8074
    {
8075
      if (!val_14)
8076
        output_asm_insn ("addil L'%2,%%r26", xoperands);
8077
 
8078
      output_asm_insn ("b %0", xoperands);
8079
 
8080
      if (val_14)
8081
        {
8082
          output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8083
          nbytes += 8;
8084
        }
8085
      else
8086
        {
8087
          output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8088
          nbytes += 12;
8089
        }
8090
    }
8091
  else if (TARGET_64BIT)
8092
    {
8093
      /* We only have one call-clobbered scratch register, so we can't
8094
         make use of the delay slot if delta doesn't fit in 14 bits.  */
8095
      if (!val_14)
8096
        {
8097
          output_asm_insn ("addil L'%2,%%r26", xoperands);
8098
          output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8099
        }
8100
 
8101
      output_asm_insn ("b,l .+8,%%r1", xoperands);
8102
 
8103
      if (TARGET_GAS)
8104
        {
8105
          output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8106
          output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
8107
        }
8108
      else
8109
        {
8110
          xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8111
          output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
8112
        }
8113
 
8114
      if (val_14)
8115
        {
8116
          output_asm_insn ("bv %%r0(%%r1)", xoperands);
8117
          output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8118
          nbytes += 20;
8119
        }
8120
      else
8121
        {
8122
          output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8123
          nbytes += 24;
8124
        }
8125
    }
8126
  else if (TARGET_PORTABLE_RUNTIME)
8127
    {
8128
      output_asm_insn ("ldil L'%0,%%r1", xoperands);
8129
      output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8130
 
8131
      if (!val_14)
8132
        output_asm_insn ("addil L'%2,%%r26", xoperands);
8133
 
8134
      output_asm_insn ("bv %%r0(%%r22)", xoperands);
8135
 
8136
      if (val_14)
8137
        {
8138
          output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8139
          nbytes += 16;
8140
        }
8141
      else
8142
        {
8143
          output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8144
          nbytes += 20;
8145
        }
8146
    }
8147
  else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8148
    {
8149
      /* The function is accessible from outside this module.  The only
8150
         way to avoid an import stub between the thunk and function is to
8151
         call the function directly with an indirect sequence similar to
8152
         that used by $$dyncall.  This is possible because $$dyncall acts
8153
         as the import stub in an indirect call.  */
8154
      ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8155
      xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8156
      output_asm_insn ("addil LT'%3,%%r19", xoperands);
8157
      output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8158
      output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8159
      output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8160
      output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8161
      output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8162
      output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8163
 
8164
      if (!val_14)
8165
        {
8166
          output_asm_insn ("addil L'%2,%%r26", xoperands);
8167
          nbytes += 4;
8168
        }
8169
 
8170
      if (TARGET_PA_20)
8171
        {
8172
          output_asm_insn ("bve (%%r22)", xoperands);
8173
          nbytes += 36;
8174
        }
8175
      else if (TARGET_NO_SPACE_REGS)
8176
        {
8177
          output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8178
          nbytes += 36;
8179
        }
8180
      else
8181
        {
8182
          output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8183
          output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8184
          output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8185
          nbytes += 44;
8186
        }
8187
 
8188
      if (val_14)
8189
        output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8190
      else
8191
        output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8192
    }
8193
  else if (flag_pic)
8194
    {
8195
      output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8196
 
8197
      if (TARGET_SOM || !TARGET_GAS)
8198
        {
8199
          output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8200
          output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
8201
        }
8202
      else
8203
        {
8204
          output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8205
          output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
8206
        }
8207
 
8208
      if (!val_14)
8209
        output_asm_insn ("addil L'%2,%%r26", xoperands);
8210
 
8211
      output_asm_insn ("bv %%r0(%%r22)", xoperands);
8212
 
8213
      if (val_14)
8214
        {
8215
          output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8216
          nbytes += 20;
8217
        }
8218
      else
8219
        {
8220
          output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8221
          nbytes += 24;
8222
        }
8223
    }
8224
  else
8225
    {
8226
      if (!val_14)
8227
        output_asm_insn ("addil L'%2,%%r26", xoperands);
8228
 
8229
      output_asm_insn ("ldil L'%0,%%r22", xoperands);
8230
      output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8231
 
8232
      if (val_14)
8233
        {
8234
          output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8235
          nbytes += 12;
8236
        }
8237
      else
8238
        {
8239
          output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8240
          nbytes += 16;
8241
        }
8242
    }
8243
 
8244
  fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8245
 
8246
  if (TARGET_SOM && TARGET_GAS)
8247
    {
8248
      /* We done with this subspace except possibly for some additional
8249
         debug information.  Forget that we are in this subspace to ensure
8250
         that the next function is output in its own subspace.  */
8251
      in_section = NULL;
8252
      cfun->machine->in_nsubspa = 2;
8253
    }
8254
 
8255
  if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8256
    {
8257
      switch_to_section (data_section);
8258
      output_asm_insn (".align 4", xoperands);
8259
      ASM_OUTPUT_LABEL (file, label);
8260
      output_asm_insn (".word P'%0", xoperands);
8261
    }
8262
 
8263
  current_thunk_number++;
8264
  nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8265
            & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8266
  last_address += nbytes;
8267
  if (old_last_address > last_address)
8268
    last_address = UINT_MAX;
8269
  update_total_code_bytes (nbytes);
8270
}
8271
 
8272
/* Only direct calls to static functions are allowed to be sibling (tail)
8273
   call optimized.
8274
 
8275
   This restriction is necessary because some linker generated stubs will
8276
   store return pointers into rp' in some cases which might clobber a
8277
   live value already in rp'.
8278
 
8279
   In a sibcall the current function and the target function share stack
8280
   space.  Thus if the path to the current function and the path to the
8281
   target function save a value in rp', they save the value into the
8282
   same stack slot, which has undesirable consequences.
8283
 
8284
   Because of the deferred binding nature of shared libraries any function
8285
   with external scope could be in a different load module and thus require
8286
   rp' to be saved when calling that function.  So sibcall optimizations
8287
   can only be safe for static function.
8288
 
8289
   Note that GCC never needs return value relocations, so we don't have to
8290
   worry about static calls with return value relocations (which require
8291
   saving rp').
8292
 
8293
   It is safe to perform a sibcall optimization when the target function
8294
   will never return.  */
8295
static bool
8296
pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8297
{
8298
  if (TARGET_PORTABLE_RUNTIME)
8299
    return false;
8300
 
8301
  /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8302
     single subspace mode and the call is not indirect.  As far as I know,
8303
     there is no operating system support for the multiple subspace mode.
8304
     It might be possible to support indirect calls if we didn't use
8305
     $$dyncall (see the indirect sequence generated in output_call).  */
8306
  if (TARGET_ELF32)
8307
    return (decl != NULL_TREE);
8308
 
8309
  /* Sibcalls are not ok because the arg pointer register is not a fixed
8310
     register.  This prevents the sibcall optimization from occurring.  In
8311
     addition, there are problems with stub placement using GNU ld.  This
8312
     is because a normal sibcall branch uses a 17-bit relocation while
8313
     a regular call branch uses a 22-bit relocation.  As a result, more
8314
     care needs to be taken in the placement of long-branch stubs.  */
8315
  if (TARGET_64BIT)
8316
    return false;
8317
 
8318
  /* Sibcalls are only ok within a translation unit.  */
8319
  return (decl && !TREE_PUBLIC (decl));
8320
}
8321
 
8322
/* ??? Addition is not commutative on the PA due to the weird implicit
8323
   space register selection rules for memory addresses.  Therefore, we
8324
   don't consider a + b == b + a, as this might be inside a MEM.  */
8325
static bool
8326
pa_commutative_p (const_rtx x, int outer_code)
8327
{
8328
  return (COMMUTATIVE_P (x)
8329
          && (TARGET_NO_SPACE_REGS
8330
              || (outer_code != UNKNOWN && outer_code != MEM)
8331
              || GET_CODE (x) != PLUS));
8332
}
8333
 
8334
/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8335
   use in fmpyadd instructions.  */
8336
int
8337
fmpyaddoperands (rtx *operands)
8338
{
8339
  enum machine_mode mode = GET_MODE (operands[0]);
8340
 
8341
  /* Must be a floating point mode.  */
8342
  if (mode != SFmode && mode != DFmode)
8343
    return 0;
8344
 
8345
  /* All modes must be the same.  */
8346
  if (! (mode == GET_MODE (operands[1])
8347
         && mode == GET_MODE (operands[2])
8348
         && mode == GET_MODE (operands[3])
8349
         && mode == GET_MODE (operands[4])
8350
         && mode == GET_MODE (operands[5])))
8351
    return 0;
8352
 
8353
  /* All operands must be registers.  */
8354
  if (! (GET_CODE (operands[1]) == REG
8355
         && GET_CODE (operands[2]) == REG
8356
         && GET_CODE (operands[3]) == REG
8357
         && GET_CODE (operands[4]) == REG
8358
         && GET_CODE (operands[5]) == REG))
8359
    return 0;
8360
 
8361
  /* Only 2 real operands to the addition.  One of the input operands must
8362
     be the same as the output operand.  */
8363
  if (! rtx_equal_p (operands[3], operands[4])
8364
      && ! rtx_equal_p (operands[3], operands[5]))
8365
    return 0;
8366
 
8367
  /* Inout operand of add cannot conflict with any operands from multiply.  */
8368
  if (rtx_equal_p (operands[3], operands[0])
8369
     || rtx_equal_p (operands[3], operands[1])
8370
     || rtx_equal_p (operands[3], operands[2]))
8371
    return 0;
8372
 
8373
  /* multiply cannot feed into addition operands.  */
8374
  if (rtx_equal_p (operands[4], operands[0])
8375
      || rtx_equal_p (operands[5], operands[0]))
8376
    return 0;
8377
 
8378
  /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8379
  if (mode == SFmode
8380
      && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8381
          || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8382
          || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8383
          || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8384
          || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8385
          || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8386
    return 0;
8387
 
8388
  /* Passed.  Operands are suitable for fmpyadd.  */
8389
  return 1;
8390
}
8391
 
8392
#if !defined(USE_COLLECT2)
8393
static void
8394
pa_asm_out_constructor (rtx symbol, int priority)
8395
{
8396
  if (!function_label_operand (symbol, VOIDmode))
8397
    hppa_encode_label (symbol);
8398
 
8399
#ifdef CTORS_SECTION_ASM_OP
8400
  default_ctor_section_asm_out_constructor (symbol, priority);
8401
#else
8402
# ifdef TARGET_ASM_NAMED_SECTION
8403
  default_named_section_asm_out_constructor (symbol, priority);
8404
# else
8405
  default_stabs_asm_out_constructor (symbol, priority);
8406
# endif
8407
#endif
8408
}
8409
 
8410
static void
8411
pa_asm_out_destructor (rtx symbol, int priority)
8412
{
8413
  if (!function_label_operand (symbol, VOIDmode))
8414
    hppa_encode_label (symbol);
8415
 
8416
#ifdef DTORS_SECTION_ASM_OP
8417
  default_dtor_section_asm_out_destructor (symbol, priority);
8418
#else
8419
# ifdef TARGET_ASM_NAMED_SECTION
8420
  default_named_section_asm_out_destructor (symbol, priority);
8421
# else
8422
  default_stabs_asm_out_destructor (symbol, priority);
8423
# endif
8424
#endif
8425
}
8426
#endif
8427
 
8428
/* This function places uninitialized global data in the bss section.
8429
   The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8430
   function on the SOM port to prevent uninitialized global data from
8431
   being placed in the data section.  */
8432
 
8433
void
8434
pa_asm_output_aligned_bss (FILE *stream,
8435
                           const char *name,
8436
                           unsigned HOST_WIDE_INT size,
8437
                           unsigned int align)
8438
{
8439
  switch_to_section (bss_section);
8440
  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8441
 
8442
#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8443
  ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8444
#endif
8445
 
8446
#ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8447
  ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8448
#endif
8449
 
8450
  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8451
  ASM_OUTPUT_LABEL (stream, name);
8452
  fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8453
}
8454
 
8455
/* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8456
   that doesn't allow the alignment of global common storage to be directly
8457
   specified.  The SOM linker aligns common storage based on the rounded
8458
   value of the NUM_BYTES parameter in the .comm directive.  It's not
8459
   possible to use the .align directive as it doesn't affect the alignment
8460
   of the label associated with a .comm directive.  */
8461
 
8462
void
8463
pa_asm_output_aligned_common (FILE *stream,
8464
                              const char *name,
8465
                              unsigned HOST_WIDE_INT size,
8466
                              unsigned int align)
8467
{
8468
  unsigned int max_common_align;
8469
 
8470
  max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8471
  if (align > max_common_align)
8472
    {
8473
      warning (0, "alignment (%u) for %s exceeds maximum alignment "
8474
               "for global common data.  Using %u",
8475
               align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8476
      align = max_common_align;
8477
    }
8478
 
8479
  switch_to_section (bss_section);
8480
 
8481
  assemble_name (stream, name);
8482
  fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8483
           MAX (size, align / BITS_PER_UNIT));
8484
}
8485
 
8486
/* We can't use .comm for local common storage as the SOM linker effectively
8487
   treats the symbol as universal and uses the same storage for local symbols
8488
   with the same name in different object files.  The .block directive
8489
   reserves an uninitialized block of storage.  However, it's not common
8490
   storage.  Fortunately, GCC never requests common storage with the same
8491
   name in any given translation unit.  */
8492
 
8493
void
8494
pa_asm_output_aligned_local (FILE *stream,
8495
                             const char *name,
8496
                             unsigned HOST_WIDE_INT size,
8497
                             unsigned int align)
8498
{
8499
  switch_to_section (bss_section);
8500
  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8501
 
8502
#ifdef LOCAL_ASM_OP
8503
  fprintf (stream, "%s", LOCAL_ASM_OP);
8504
  assemble_name (stream, name);
8505
  fprintf (stream, "\n");
8506
#endif
8507
 
8508
  ASM_OUTPUT_LABEL (stream, name);
8509
  fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8510
}
8511
 
8512
/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8513
   use in fmpysub instructions.  */
8514
int
8515
fmpysuboperands (rtx *operands)
8516
{
8517
  enum machine_mode mode = GET_MODE (operands[0]);
8518
 
8519
  /* Must be a floating point mode.  */
8520
  if (mode != SFmode && mode != DFmode)
8521
    return 0;
8522
 
8523
  /* All modes must be the same.  */
8524
  if (! (mode == GET_MODE (operands[1])
8525
         && mode == GET_MODE (operands[2])
8526
         && mode == GET_MODE (operands[3])
8527
         && mode == GET_MODE (operands[4])
8528
         && mode == GET_MODE (operands[5])))
8529
    return 0;
8530
 
8531
  /* All operands must be registers.  */
8532
  if (! (GET_CODE (operands[1]) == REG
8533
         && GET_CODE (operands[2]) == REG
8534
         && GET_CODE (operands[3]) == REG
8535
         && GET_CODE (operands[4]) == REG
8536
         && GET_CODE (operands[5]) == REG))
8537
    return 0;
8538
 
8539
  /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
8540
     operation, so operands[4] must be the same as operand[3].  */
8541
  if (! rtx_equal_p (operands[3], operands[4]))
8542
    return 0;
8543
 
8544
  /* multiply cannot feed into subtraction.  */
8545
  if (rtx_equal_p (operands[5], operands[0]))
8546
    return 0;
8547
 
8548
  /* Inout operand of sub cannot conflict with any operands from multiply.  */
8549
  if (rtx_equal_p (operands[3], operands[0])
8550
     || rtx_equal_p (operands[3], operands[1])
8551
     || rtx_equal_p (operands[3], operands[2]))
8552
    return 0;
8553
 
8554
  /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8555
  if (mode == SFmode
8556
      && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8557
          || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8558
          || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8559
          || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8560
          || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8561
          || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8562
    return 0;
8563
 
8564
  /* Passed.  Operands are suitable for fmpysub.  */
8565
  return 1;
8566
}
8567
 
8568
/* Return 1 if the given constant is 2, 4, or 8.  These are the valid
8569
   constants for shadd instructions.  */
8570
int
8571
shadd_constant_p (int val)
8572
{
8573
  if (val == 2 || val == 4 || val == 8)
8574
    return 1;
8575
  else
8576
    return 0;
8577
}
8578
 
8579
/* Return 1 if OP is valid as a base or index register in a
8580
   REG+REG address.  */
8581
 
8582
int
8583
borx_reg_operand (rtx op, enum machine_mode mode)
8584
{
8585
  if (GET_CODE (op) != REG)
8586
    return 0;
8587
 
8588
  /* We must reject virtual registers as the only expressions that
8589
     can be instantiated are REG and REG+CONST.  */
8590
  if (op == virtual_incoming_args_rtx
8591
      || op == virtual_stack_vars_rtx
8592
      || op == virtual_stack_dynamic_rtx
8593
      || op == virtual_outgoing_args_rtx
8594
      || op == virtual_cfa_rtx)
8595
    return 0;
8596
 
8597
  /* While it's always safe to index off the frame pointer, it's not
8598
     profitable to do so when the frame pointer is being eliminated.  */
8599
  if (!reload_completed
8600
      && flag_omit_frame_pointer
8601
      && !cfun->calls_alloca
8602
      && op == frame_pointer_rtx)
8603
    return 0;
8604
 
8605
  return register_operand (op, mode);
8606
}
8607
 
8608
/* Return 1 if this operand is anything other than a hard register.  */
8609
 
8610
int
8611
non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8612
{
8613
  return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8614
}
8615
 
8616
/* Return TRUE if INSN branches forward.  */
8617
 
8618
static bool
8619
forward_branch_p (rtx insn)
8620
{
8621
  rtx lab = JUMP_LABEL (insn);
8622
 
8623
  /* The INSN must have a jump label.  */
8624
  gcc_assert (lab != NULL_RTX);
8625
 
8626
  if (INSN_ADDRESSES_SET_P ())
8627
    return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8628
 
8629
  while (insn)
8630
    {
8631
      if (insn == lab)
8632
        return true;
8633
      else
8634
        insn = NEXT_INSN (insn);
8635
    }
8636
 
8637
  return false;
8638
}
8639
 
8640
/* Return 1 if OP is an equality comparison, else return 0.  */
8641
int
8642
eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8643
{
8644
  return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8645
}
8646
 
8647
/* Return 1 if INSN is in the delay slot of a call instruction.  */
8648
int
8649
jump_in_call_delay (rtx insn)
8650
{
8651
 
8652
  if (GET_CODE (insn) != JUMP_INSN)
8653
    return 0;
8654
 
8655
  if (PREV_INSN (insn)
8656
      && PREV_INSN (PREV_INSN (insn))
8657
      && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8658
    {
8659
      rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8660
 
8661
      return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8662
              && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8663
 
8664
    }
8665
  else
8666
    return 0;
8667
}
8668
 
8669
/* Output an unconditional move and branch insn.  */
8670
 
8671
const char *
8672
output_parallel_movb (rtx *operands, rtx insn)
8673
{
8674
  int length = get_attr_length (insn);
8675
 
8676
  /* These are the cases in which we win.  */
8677
  if (length == 4)
8678
    return "mov%I1b,tr %1,%0,%2";
8679
 
8680
  /* None of the following cases win, but they don't lose either.  */
8681
  if (length == 8)
8682
    {
8683
      if (dbr_sequence_length () == 0)
8684
        {
8685
          /* Nothing in the delay slot, fake it by putting the combined
8686
             insn (the copy or add) in the delay slot of a bl.  */
8687
          if (GET_CODE (operands[1]) == CONST_INT)
8688
            return "b %2\n\tldi %1,%0";
8689
          else
8690
            return "b %2\n\tcopy %1,%0";
8691
        }
8692
      else
8693
        {
8694
          /* Something in the delay slot, but we've got a long branch.  */
8695
          if (GET_CODE (operands[1]) == CONST_INT)
8696
            return "ldi %1,%0\n\tb %2";
8697
          else
8698
            return "copy %1,%0\n\tb %2";
8699
        }
8700
    }
8701
 
8702
  if (GET_CODE (operands[1]) == CONST_INT)
8703
    output_asm_insn ("ldi %1,%0", operands);
8704
  else
8705
    output_asm_insn ("copy %1,%0", operands);
8706
  return output_lbranch (operands[2], insn, 1);
8707
}
8708
 
8709
/* Output an unconditional add and branch insn.  */
8710
 
8711
const char *
8712
output_parallel_addb (rtx *operands, rtx insn)
8713
{
8714
  int length = get_attr_length (insn);
8715
 
8716
  /* To make life easy we want operand0 to be the shared input/output
8717
     operand and operand1 to be the readonly operand.  */
8718
  if (operands[0] == operands[1])
8719
    operands[1] = operands[2];
8720
 
8721
  /* These are the cases in which we win.  */
8722
  if (length == 4)
8723
    return "add%I1b,tr %1,%0,%3";
8724
 
8725
  /* None of the following cases win, but they don't lose either.  */
8726
  if (length == 8)
8727
    {
8728
      if (dbr_sequence_length () == 0)
8729
        /* Nothing in the delay slot, fake it by putting the combined
8730
           insn (the copy or add) in the delay slot of a bl.  */
8731
        return "b %3\n\tadd%I1 %1,%0,%0";
8732
      else
8733
        /* Something in the delay slot, but we've got a long branch.  */
8734
        return "add%I1 %1,%0,%0\n\tb %3";
8735
    }
8736
 
8737
  output_asm_insn ("add%I1 %1,%0,%0", operands);
8738
  return output_lbranch (operands[3], insn, 1);
8739
}
8740
 
8741
/* Return nonzero if INSN (a jump insn) immediately follows a call
8742
   to a named function.  This is used to avoid filling the delay slot
8743
   of the jump since it can usually be eliminated by modifying RP in
8744
   the delay slot of the call.  */
8745
 
8746
int
8747
following_call (rtx insn)
8748
{
8749
  if (! TARGET_JUMP_IN_DELAY)
8750
    return 0;
8751
 
8752
  /* Find the previous real insn, skipping NOTEs.  */
8753
  insn = PREV_INSN (insn);
8754
  while (insn && GET_CODE (insn) == NOTE)
8755
    insn = PREV_INSN (insn);
8756
 
8757
  /* Check for CALL_INSNs and millicode calls.  */
8758
  if (insn
8759
      && ((GET_CODE (insn) == CALL_INSN
8760
           && get_attr_type (insn) != TYPE_DYNCALL)
8761
          || (GET_CODE (insn) == INSN
8762
              && GET_CODE (PATTERN (insn)) != SEQUENCE
8763
              && GET_CODE (PATTERN (insn)) != USE
8764
              && GET_CODE (PATTERN (insn)) != CLOBBER
8765
              && get_attr_type (insn) == TYPE_MILLI)))
8766
    return 1;
8767
 
8768
  return 0;
8769
}
8770
 
8771
/* We use this hook to perform a PA specific optimization which is difficult
8772
   to do in earlier passes.
8773
 
8774
   We want the delay slots of branches within jump tables to be filled.
8775
   None of the compiler passes at the moment even has the notion that a
8776
   PA jump table doesn't contain addresses, but instead contains actual
8777
   instructions!
8778
 
8779
   Because we actually jump into the table, the addresses of each entry
8780
   must stay constant in relation to the beginning of the table (which
8781
   itself must stay constant relative to the instruction to jump into
8782
   it).  I don't believe we can guarantee earlier passes of the compiler
8783
   will adhere to those rules.
8784
 
8785
   So, late in the compilation process we find all the jump tables, and
8786
   expand them into real code -- e.g. each entry in the jump table vector
8787
   will get an appropriate label followed by a jump to the final target.
8788
 
8789
   Reorg and the final jump pass can then optimize these branches and
8790
   fill their delay slots.  We end up with smaller, more efficient code.
8791
 
8792
   The jump instructions within the table are special; we must be able
8793
   to identify them during assembly output (if the jumps don't get filled
8794
   we need to emit a nop rather than nullifying the delay slot)).  We
8795
   identify jumps in switch tables by using insns with the attribute
8796
   type TYPE_BTABLE_BRANCH.
8797
 
8798
   We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8799
   insns.  This serves two purposes, first it prevents jump.c from
8800
   noticing that the last N entries in the table jump to the instruction
8801
   immediately after the table and deleting the jumps.  Second, those
8802
   insns mark where we should emit .begin_brtab and .end_brtab directives
8803
   when using GAS (allows for better link time optimizations).  */
8804
 
8805
static void
8806
pa_reorg (void)
8807
{
8808
  rtx insn;
8809
 
8810
  remove_useless_addtr_insns (1);
8811
 
8812
  if (pa_cpu < PROCESSOR_8000)
8813
    pa_combine_instructions ();
8814
 
8815
 
8816
  /* This is fairly cheap, so always run it if optimizing.  */
8817
  if (optimize > 0 && !TARGET_BIG_SWITCH)
8818
    {
8819
      /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns.  */
8820
      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8821
        {
8822
          rtx pattern, tmp, location, label;
8823
          unsigned int length, i;
8824
 
8825
          /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode.  */
8826
          if (GET_CODE (insn) != JUMP_INSN
8827
              || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8828
                  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8829
            continue;
8830
 
8831
          /* Emit marker for the beginning of the branch table.  */
8832
          emit_insn_before (gen_begin_brtab (), insn);
8833
 
8834
          pattern = PATTERN (insn);
8835
          location = PREV_INSN (insn);
8836
          length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8837
 
8838
          for (i = 0; i < length; i++)
8839
            {
8840
              /* Emit a label before each jump to keep jump.c from
8841
                 removing this code.  */
8842
              tmp = gen_label_rtx ();
8843
              LABEL_NUSES (tmp) = 1;
8844
              emit_label_after (tmp, location);
8845
              location = NEXT_INSN (location);
8846
 
8847
              if (GET_CODE (pattern) == ADDR_VEC)
8848
                label = XEXP (XVECEXP (pattern, 0, i), 0);
8849
              else
8850
                label = XEXP (XVECEXP (pattern, 1, i), 0);
8851
 
8852
              tmp = gen_short_jump (label);
8853
 
8854
              /* Emit the jump itself.  */
8855
              tmp = emit_jump_insn_after (tmp, location);
8856
              JUMP_LABEL (tmp) = label;
8857
              LABEL_NUSES (label)++;
8858
              location = NEXT_INSN (location);
8859
 
8860
              /* Emit a BARRIER after the jump.  */
8861
              emit_barrier_after (location);
8862
              location = NEXT_INSN (location);
8863
            }
8864
 
8865
          /* Emit marker for the end of the branch table.  */
8866
          emit_insn_before (gen_end_brtab (), location);
8867
          location = NEXT_INSN (location);
8868
          emit_barrier_after (location);
8869
 
8870
          /* Delete the ADDR_VEC or ADDR_DIFF_VEC.  */
8871
          delete_insn (insn);
8872
        }
8873
    }
8874
  else
8875
    {
8876
      /* Still need brtab marker insns.  FIXME: the presence of these
8877
         markers disables output of the branch table to readonly memory,
8878
         and any alignment directives that might be needed.  Possibly,
8879
         the begin_brtab insn should be output before the label for the
8880
         table.  This doesn't matter at the moment since the tables are
8881
         always output in the text section.  */
8882
      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8883
        {
8884
          /* Find an ADDR_VEC insn.  */
8885
          if (GET_CODE (insn) != JUMP_INSN
8886
              || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8887
                  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8888
            continue;
8889
 
8890
          /* Now generate markers for the beginning and end of the
8891
             branch table.  */
8892
          emit_insn_before (gen_begin_brtab (), insn);
8893
          emit_insn_after (gen_end_brtab (), insn);
8894
        }
8895
    }
8896
}
8897
 
8898
/* The PA has a number of odd instructions which can perform multiple
8899
   tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
8900
   it may be profitable to combine two instructions into one instruction
8901
   with two outputs.  It's not profitable PA2.0 machines because the
8902
   two outputs would take two slots in the reorder buffers.
8903
 
8904
   This routine finds instructions which can be combined and combines
8905
   them.  We only support some of the potential combinations, and we
8906
   only try common ways to find suitable instructions.
8907
 
8908
      * addb can add two registers or a register and a small integer
8909
      and jump to a nearby (+-8k) location.  Normally the jump to the
8910
      nearby location is conditional on the result of the add, but by
8911
      using the "true" condition we can make the jump unconditional.
8912
      Thus addb can perform two independent operations in one insn.
8913
 
8914
      * movb is similar to addb in that it can perform a reg->reg
8915
      or small immediate->reg copy and jump to a nearby (+-8k location).
8916
 
8917
      * fmpyadd and fmpysub can perform a FP multiply and either an
8918
      FP add or FP sub if the operands of the multiply and add/sub are
8919
      independent (there are other minor restrictions).  Note both
8920
      the fmpy and fadd/fsub can in theory move to better spots according
8921
      to data dependencies, but for now we require the fmpy stay at a
8922
      fixed location.
8923
 
8924
      * Many of the memory operations can perform pre & post updates
8925
      of index registers.  GCC's pre/post increment/decrement addressing
8926
      is far too simple to take advantage of all the possibilities.  This
8927
      pass may not be suitable since those insns may not be independent.
8928
 
8929
      * comclr can compare two ints or an int and a register, nullify
8930
      the following instruction and zero some other register.  This
8931
      is more difficult to use as it's harder to find an insn which
8932
      will generate a comclr than finding something like an unconditional
8933
      branch.  (conditional moves & long branches create comclr insns).
8934
 
8935
      * Most arithmetic operations can conditionally skip the next
8936
      instruction.  They can be viewed as "perform this operation
8937
      and conditionally jump to this nearby location" (where nearby
8938
      is an insns away).  These are difficult to use due to the
8939
      branch length restrictions.  */
8940
 
8941
static void
8942
pa_combine_instructions (void)
8943
{
8944
  rtx anchor, new_rtx;
8945
 
8946
  /* This can get expensive since the basic algorithm is on the
8947
     order of O(n^2) (or worse).  Only do it for -O2 or higher
8948
     levels of optimization.  */
8949
  if (optimize < 2)
8950
    return;
8951
 
8952
  /* Walk down the list of insns looking for "anchor" insns which
8953
     may be combined with "floating" insns.  As the name implies,
8954
     "anchor" instructions don't move, while "floating" insns may
8955
     move around.  */
8956
  new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8957
  new_rtx = make_insn_raw (new_rtx);
8958
 
8959
  for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8960
    {
8961
      enum attr_pa_combine_type anchor_attr;
8962
      enum attr_pa_combine_type floater_attr;
8963
 
8964
      /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8965
         Also ignore any special USE insns.  */
8966
      if ((GET_CODE (anchor) != INSN
8967
          && GET_CODE (anchor) != JUMP_INSN
8968
          && GET_CODE (anchor) != CALL_INSN)
8969
          || GET_CODE (PATTERN (anchor)) == USE
8970
          || GET_CODE (PATTERN (anchor)) == CLOBBER
8971
          || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8972
          || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8973
        continue;
8974
 
8975
      anchor_attr = get_attr_pa_combine_type (anchor);
8976
      /* See if anchor is an insn suitable for combination.  */
8977
      if (anchor_attr == PA_COMBINE_TYPE_FMPY
8978
          || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8979
          || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8980
              && ! forward_branch_p (anchor)))
8981
        {
8982
          rtx floater;
8983
 
8984
          for (floater = PREV_INSN (anchor);
8985
               floater;
8986
               floater = PREV_INSN (floater))
8987
            {
8988
              if (GET_CODE (floater) == NOTE
8989
                  || (GET_CODE (floater) == INSN
8990
                      && (GET_CODE (PATTERN (floater)) == USE
8991
                          || GET_CODE (PATTERN (floater)) == CLOBBER)))
8992
                continue;
8993
 
8994
              /* Anything except a regular INSN will stop our search.  */
8995
              if (GET_CODE (floater) != INSN
8996
                  || GET_CODE (PATTERN (floater)) == ADDR_VEC
8997
                  || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8998
                {
8999
                  floater = NULL_RTX;
9000
                  break;
9001
                }
9002
 
9003
              /* See if FLOATER is suitable for combination with the
9004
                 anchor.  */
9005
              floater_attr = get_attr_pa_combine_type (floater);
9006
              if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9007
                   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9008
                  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9009
                      && floater_attr == PA_COMBINE_TYPE_FMPY))
9010
                {
9011
                  /* If ANCHOR and FLOATER can be combined, then we're
9012
                     done with this pass.  */
9013
                  if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9014
                                        SET_DEST (PATTERN (floater)),
9015
                                        XEXP (SET_SRC (PATTERN (floater)), 0),
9016
                                        XEXP (SET_SRC (PATTERN (floater)), 1)))
9017
                    break;
9018
                }
9019
 
9020
              else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9021
                       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9022
                {
9023
                  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9024
                    {
9025
                      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9026
                                            SET_DEST (PATTERN (floater)),
9027
                                        XEXP (SET_SRC (PATTERN (floater)), 0),
9028
                                        XEXP (SET_SRC (PATTERN (floater)), 1)))
9029
                        break;
9030
                    }
9031
                  else
9032
                    {
9033
                      if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9034
                                            SET_DEST (PATTERN (floater)),
9035
                                            SET_SRC (PATTERN (floater)),
9036
                                            SET_SRC (PATTERN (floater))))
9037
                        break;
9038
                    }
9039
                }
9040
            }
9041
 
9042
          /* If we didn't find anything on the backwards scan try forwards.  */
9043
          if (!floater
9044
              && (anchor_attr == PA_COMBINE_TYPE_FMPY
9045
                  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9046
            {
9047
              for (floater = anchor; floater; floater = NEXT_INSN (floater))
9048
                {
9049
                  if (GET_CODE (floater) == NOTE
9050
                      || (GET_CODE (floater) == INSN
9051
                          && (GET_CODE (PATTERN (floater)) == USE
9052
                              || GET_CODE (PATTERN (floater)) == CLOBBER)))
9053
 
9054
                    continue;
9055
 
9056
                  /* Anything except a regular INSN will stop our search.  */
9057
                  if (GET_CODE (floater) != INSN
9058
                      || GET_CODE (PATTERN (floater)) == ADDR_VEC
9059
                      || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9060
                    {
9061
                      floater = NULL_RTX;
9062
                      break;
9063
                    }
9064
 
9065
                  /* See if FLOATER is suitable for combination with the
9066
                     anchor.  */
9067
                  floater_attr = get_attr_pa_combine_type (floater);
9068
                  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9069
                       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9070
                      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9071
                          && floater_attr == PA_COMBINE_TYPE_FMPY))
9072
                    {
9073
                      /* If ANCHOR and FLOATER can be combined, then we're
9074
                         done with this pass.  */
9075
                      if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9076
                                            SET_DEST (PATTERN (floater)),
9077
                                            XEXP (SET_SRC (PATTERN (floater)),
9078
                                                  0),
9079
                                            XEXP (SET_SRC (PATTERN (floater)),
9080
                                                  1)))
9081
                        break;
9082
                    }
9083
                }
9084
            }
9085
 
9086
          /* FLOATER will be nonzero if we found a suitable floating
9087
             insn for combination with ANCHOR.  */
9088
          if (floater
9089
              && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9090
                  || anchor_attr == PA_COMBINE_TYPE_FMPY))
9091
            {
9092
              /* Emit the new instruction and delete the old anchor.  */
9093
              emit_insn_before (gen_rtx_PARALLEL
9094
                                (VOIDmode,
9095
                                 gen_rtvec (2, PATTERN (anchor),
9096
                                            PATTERN (floater))),
9097
                                anchor);
9098
 
9099
              SET_INSN_DELETED (anchor);
9100
 
9101
              /* Emit a special USE insn for FLOATER, then delete
9102
                 the floating insn.  */
9103
              emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9104
              delete_insn (floater);
9105
 
9106
              continue;
9107
            }
9108
          else if (floater
9109
                   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9110
            {
9111
              rtx temp;
9112
              /* Emit the new_jump instruction and delete the old anchor.  */
9113
              temp
9114
                = emit_jump_insn_before (gen_rtx_PARALLEL
9115
                                         (VOIDmode,
9116
                                          gen_rtvec (2, PATTERN (anchor),
9117
                                                     PATTERN (floater))),
9118
                                         anchor);
9119
 
9120
              JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9121
              SET_INSN_DELETED (anchor);
9122
 
9123
              /* Emit a special USE insn for FLOATER, then delete
9124
                 the floating insn.  */
9125
              emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
9126
              delete_insn (floater);
9127
              continue;
9128
            }
9129
        }
9130
    }
9131
}
9132
 
9133
static int
9134
pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
9135
                  rtx src1, rtx src2)
9136
{
9137
  int insn_code_number;
9138
  rtx start, end;
9139
 
9140
  /* Create a PARALLEL with the patterns of ANCHOR and
9141
     FLOATER, try to recognize it, then test constraints
9142
     for the resulting pattern.
9143
 
9144
     If the pattern doesn't match or the constraints
9145
     aren't met keep searching for a suitable floater
9146
     insn.  */
9147
  XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9148
  XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9149
  INSN_CODE (new_rtx) = -1;
9150
  insn_code_number = recog_memoized (new_rtx);
9151
  if (insn_code_number < 0
9152
      || (extract_insn (new_rtx), ! constrain_operands (1)))
9153
    return 0;
9154
 
9155
  if (reversed)
9156
    {
9157
      start = anchor;
9158
      end = floater;
9159
    }
9160
  else
9161
    {
9162
      start = floater;
9163
      end = anchor;
9164
    }
9165
 
9166
  /* There's up to three operands to consider.  One
9167
     output and two inputs.
9168
 
9169
     The output must not be used between FLOATER & ANCHOR
9170
     exclusive.  The inputs must not be set between
9171
     FLOATER and ANCHOR exclusive.  */
9172
 
9173
  if (reg_used_between_p (dest, start, end))
9174
    return 0;
9175
 
9176
  if (reg_set_between_p (src1, start, end))
9177
    return 0;
9178
 
9179
  if (reg_set_between_p (src2, start, end))
9180
    return 0;
9181
 
9182
  /* If we get here, then everything is good.  */
9183
  return 1;
9184
}
9185
 
9186
/* Return nonzero if references for INSN are delayed.
9187
 
9188
   Millicode insns are actually function calls with some special
9189
   constraints on arguments and register usage.
9190
 
9191
   Millicode calls always expect their arguments in the integer argument
9192
   registers, and always return their result in %r29 (ret1).  They
9193
   are expected to clobber their arguments, %r1, %r29, and the return
9194
   pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9195
 
9196
   This function tells reorg that the references to arguments and
9197
   millicode calls do not appear to happen until after the millicode call.
9198
   This allows reorg to put insns which set the argument registers into the
9199
   delay slot of the millicode call -- thus they act more like traditional
9200
   CALL_INSNs.
9201
 
9202
   Note we cannot consider side effects of the insn to be delayed because
9203
   the branch and link insn will clobber the return pointer.  If we happened
9204
   to use the return pointer in the delay slot of the call, then we lose.
9205
 
9206
   get_attr_type will try to recognize the given insn, so make sure to
9207
   filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9208
   in particular.  */
9209
int
9210
insn_refs_are_delayed (rtx insn)
9211
{
9212
  return ((GET_CODE (insn) == INSN
9213
           && GET_CODE (PATTERN (insn)) != SEQUENCE
9214
           && GET_CODE (PATTERN (insn)) != USE
9215
           && GET_CODE (PATTERN (insn)) != CLOBBER
9216
           && get_attr_type (insn) == TYPE_MILLI));
9217
}
9218
 
9219
/* Promote the return value, but not the arguments.  */
9220
 
9221
static enum machine_mode
9222
pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9223
                          enum machine_mode mode,
9224
                          int *punsignedp ATTRIBUTE_UNUSED,
9225
                          const_tree fntype ATTRIBUTE_UNUSED,
9226
                          int for_return)
9227
{
9228
  if (for_return == 0)
9229
    return mode;
9230
  return promote_mode (type, mode, punsignedp);
9231
}
9232
 
9233
/* On the HP-PA the value is found in register(s) 28(-29), unless
9234
   the mode is SF or DF. Then the value is returned in fr4 (32).
9235
 
9236
   This must perform the same promotions as PROMOTE_MODE, else promoting
9237
   return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9238
 
9239
   Small structures must be returned in a PARALLEL on PA64 in order
9240
   to match the HP Compiler ABI.  */
9241
 
9242
rtx
9243
pa_function_value (const_tree valtype,
9244
                   const_tree func ATTRIBUTE_UNUSED,
9245
                   bool outgoing ATTRIBUTE_UNUSED)
9246
{
9247
  enum machine_mode valmode;
9248
 
9249
  if (AGGREGATE_TYPE_P (valtype)
9250
      || TREE_CODE (valtype) == COMPLEX_TYPE
9251
      || TREE_CODE (valtype) == VECTOR_TYPE)
9252
    {
9253
      if (TARGET_64BIT)
9254
        {
9255
          /* Aggregates with a size less than or equal to 128 bits are
9256
             returned in GR 28(-29).  They are left justified.  The pad
9257
             bits are undefined.  Larger aggregates are returned in
9258
             memory.  */
9259
          rtx loc[2];
9260
          int i, offset = 0;
9261
          int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9262
 
9263
          for (i = 0; i < ub; i++)
9264
            {
9265
              loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9266
                                          gen_rtx_REG (DImode, 28 + i),
9267
                                          GEN_INT (offset));
9268
              offset += 8;
9269
            }
9270
 
9271
          return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9272
        }
9273
      else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9274
        {
9275
          /* Aggregates 5 to 8 bytes in size are returned in general
9276
             registers r28-r29 in the same manner as other non
9277
             floating-point objects.  The data is right-justified and
9278
             zero-extended to 64 bits.  This is opposite to the normal
9279
             justification used on big endian targets and requires
9280
             special treatment.  */
9281
          rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9282
                                       gen_rtx_REG (DImode, 28), const0_rtx);
9283
          return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9284
        }
9285
    }
9286
 
9287
  if ((INTEGRAL_TYPE_P (valtype)
9288
       && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9289
      || POINTER_TYPE_P (valtype))
9290
    valmode = word_mode;
9291
  else
9292
    valmode = TYPE_MODE (valtype);
9293
 
9294
  if (TREE_CODE (valtype) == REAL_TYPE
9295
      && !AGGREGATE_TYPE_P (valtype)
9296
      && TYPE_MODE (valtype) != TFmode
9297
      && !TARGET_SOFT_FLOAT)
9298
    return gen_rtx_REG (valmode, 32);
9299
 
9300
  return gen_rtx_REG (valmode, 28);
9301
}
9302
 
9303
/* Return the location of a parameter that is passed in a register or NULL
9304
   if the parameter has any component that is passed in memory.
9305
 
9306
   This is new code and will be pushed to into the net sources after
9307
   further testing.
9308
 
9309
   ??? We might want to restructure this so that it looks more like other
9310
   ports.  */
9311
rtx
9312
function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
9313
              int named ATTRIBUTE_UNUSED)
9314
{
9315
  int max_arg_words = (TARGET_64BIT ? 8 : 4);
9316
  int alignment = 0;
9317
  int arg_size;
9318
  int fpr_reg_base;
9319
  int gpr_reg_base;
9320
  rtx retval;
9321
 
9322
  if (mode == VOIDmode)
9323
    return NULL_RTX;
9324
 
9325
  arg_size = FUNCTION_ARG_SIZE (mode, type);
9326
 
9327
  /* If this arg would be passed partially or totally on the stack, then
9328
     this routine should return zero.  pa_arg_partial_bytes will
9329
     handle arguments which are split between regs and stack slots if
9330
     the ABI mandates split arguments.  */
9331
  if (!TARGET_64BIT)
9332
    {
9333
      /* The 32-bit ABI does not split arguments.  */
9334
      if (cum->words + arg_size > max_arg_words)
9335
        return NULL_RTX;
9336
    }
9337
  else
9338
    {
9339
      if (arg_size > 1)
9340
        alignment = cum->words & 1;
9341
      if (cum->words + alignment >= max_arg_words)
9342
        return NULL_RTX;
9343
    }
9344
 
9345
  /* The 32bit ABIs and the 64bit ABIs are rather different,
9346
     particularly in their handling of FP registers.  We might
9347
     be able to cleverly share code between them, but I'm not
9348
     going to bother in the hope that splitting them up results
9349
     in code that is more easily understood.  */
9350
 
9351
  if (TARGET_64BIT)
9352
    {
9353
      /* Advance the base registers to their current locations.
9354
 
9355
         Remember, gprs grow towards smaller register numbers while
9356
         fprs grow to higher register numbers.  Also remember that
9357
         although FP regs are 32-bit addressable, we pretend that
9358
         the registers are 64-bits wide.  */
9359
      gpr_reg_base = 26 - cum->words;
9360
      fpr_reg_base = 32 + cum->words;
9361
 
9362
      /* Arguments wider than one word and small aggregates need special
9363
         treatment.  */
9364
      if (arg_size > 1
9365
          || mode == BLKmode
9366
          || (type && (AGGREGATE_TYPE_P (type)
9367
                       || TREE_CODE (type) == COMPLEX_TYPE
9368
                       || TREE_CODE (type) == VECTOR_TYPE)))
9369
        {
9370
          /* Double-extended precision (80-bit), quad-precision (128-bit)
9371
             and aggregates including complex numbers are aligned on
9372
             128-bit boundaries.  The first eight 64-bit argument slots
9373
             are associated one-to-one, with general registers r26
9374
             through r19, and also with floating-point registers fr4
9375
             through fr11.  Arguments larger than one word are always
9376
             passed in general registers.
9377
 
9378
             Using a PARALLEL with a word mode register results in left
9379
             justified data on a big-endian target.  */
9380
 
9381
          rtx loc[8];
9382
          int i, offset = 0, ub = arg_size;
9383
 
9384
          /* Align the base register.  */
9385
          gpr_reg_base -= alignment;
9386
 
9387
          ub = MIN (ub, max_arg_words - cum->words - alignment);
9388
          for (i = 0; i < ub; i++)
9389
            {
9390
              loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9391
                                          gen_rtx_REG (DImode, gpr_reg_base),
9392
                                          GEN_INT (offset));
9393
              gpr_reg_base -= 1;
9394
              offset += 8;
9395
            }
9396
 
9397
          return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9398
        }
9399
     }
9400
  else
9401
    {
9402
      /* If the argument is larger than a word, then we know precisely
9403
         which registers we must use.  */
9404
      if (arg_size > 1)
9405
        {
9406
          if (cum->words)
9407
            {
9408
              gpr_reg_base = 23;
9409
              fpr_reg_base = 38;
9410
            }
9411
          else
9412
            {
9413
              gpr_reg_base = 25;
9414
              fpr_reg_base = 34;
9415
            }
9416
 
9417
          /* Structures 5 to 8 bytes in size are passed in the general
9418
             registers in the same manner as other non floating-point
9419
             objects.  The data is right-justified and zero-extended
9420
             to 64 bits.  This is opposite to the normal justification
9421
             used on big endian targets and requires special treatment.
9422
             We now define BLOCK_REG_PADDING to pad these objects.
9423
             Aggregates, complex and vector types are passed in the same
9424
             manner as structures.  */
9425
          if (mode == BLKmode
9426
              || (type && (AGGREGATE_TYPE_P (type)
9427
                           || TREE_CODE (type) == COMPLEX_TYPE
9428
                           || TREE_CODE (type) == VECTOR_TYPE)))
9429
            {
9430
              rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9431
                                           gen_rtx_REG (DImode, gpr_reg_base),
9432
                                           const0_rtx);
9433
              return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9434
            }
9435
        }
9436
      else
9437
        {
9438
           /* We have a single word (32 bits).  A simple computation
9439
              will get us the register #s we need.  */
9440
           gpr_reg_base = 26 - cum->words;
9441
           fpr_reg_base = 32 + 2 * cum->words;
9442
        }
9443
    }
9444
 
9445
  /* Determine if the argument needs to be passed in both general and
9446
     floating point registers.  */
9447
  if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9448
       /* If we are doing soft-float with portable runtime, then there
9449
          is no need to worry about FP regs.  */
9450
       && !TARGET_SOFT_FLOAT
9451
       /* The parameter must be some kind of scalar float, else we just
9452
          pass it in integer registers.  */
9453
       && GET_MODE_CLASS (mode) == MODE_FLOAT
9454
       /* The target function must not have a prototype.  */
9455
       && cum->nargs_prototype <= 0
9456
       /* libcalls do not need to pass items in both FP and general
9457
          registers.  */
9458
       && type != NULL_TREE
9459
       /* All this hair applies to "outgoing" args only.  This includes
9460
          sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9461
       && !cum->incoming)
9462
      /* Also pass outgoing floating arguments in both registers in indirect
9463
         calls with the 32 bit ABI and the HP assembler since there is no
9464
         way to the specify argument locations in static functions.  */
9465
      || (!TARGET_64BIT
9466
          && !TARGET_GAS
9467
          && !cum->incoming
9468
          && cum->indirect
9469
          && GET_MODE_CLASS (mode) == MODE_FLOAT))
9470
    {
9471
      retval
9472
        = gen_rtx_PARALLEL
9473
            (mode,
9474
             gen_rtvec (2,
9475
                        gen_rtx_EXPR_LIST (VOIDmode,
9476
                                           gen_rtx_REG (mode, fpr_reg_base),
9477
                                           const0_rtx),
9478
                        gen_rtx_EXPR_LIST (VOIDmode,
9479
                                           gen_rtx_REG (mode, gpr_reg_base),
9480
                                           const0_rtx)));
9481
    }
9482
  else
9483
    {
9484
      /* See if we should pass this parameter in a general register.  */
9485
      if (TARGET_SOFT_FLOAT
9486
          /* Indirect calls in the normal 32bit ABI require all arguments
9487
             to be passed in general registers.  */
9488
          || (!TARGET_PORTABLE_RUNTIME
9489
              && !TARGET_64BIT
9490
              && !TARGET_ELF32
9491
              && cum->indirect)
9492
          /* If the parameter is not a scalar floating-point parameter,
9493
             then it belongs in GPRs.  */
9494
          || GET_MODE_CLASS (mode) != MODE_FLOAT
9495
          /* Structure with single SFmode field belongs in GPR.  */
9496
          || (type && AGGREGATE_TYPE_P (type)))
9497
        retval = gen_rtx_REG (mode, gpr_reg_base);
9498
      else
9499
        retval = gen_rtx_REG (mode, fpr_reg_base);
9500
    }
9501
  return retval;
9502
}
9503
 
9504
 
9505
/* If this arg would be passed totally in registers or totally on the stack,
9506
   then this routine should return zero.  */
9507
 
9508
static int
9509
pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9510
                      tree type, bool named ATTRIBUTE_UNUSED)
9511
{
9512
  unsigned int max_arg_words = 8;
9513
  unsigned int offset = 0;
9514
 
9515
  if (!TARGET_64BIT)
9516
    return 0;
9517
 
9518
  if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9519
    offset = 1;
9520
 
9521
  if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9522
    /* Arg fits fully into registers.  */
9523
    return 0;
9524
  else if (cum->words + offset >= max_arg_words)
9525
    /* Arg fully on the stack.  */
9526
    return 0;
9527
  else
9528
    /* Arg is split.  */
9529
    return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9530
}
9531
 
9532
 
9533
/* A get_unnamed_section callback for switching to the text section.
9534
 
9535
   This function is only used with SOM.  Because we don't support
9536
   named subspaces, we can only create a new subspace or switch back
9537
   to the default text subspace.  */
9538
 
9539
static void
9540
som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9541
{
9542
  gcc_assert (TARGET_SOM);
9543
  if (TARGET_GAS)
9544
    {
9545
      if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9546
        {
9547
          /* We only want to emit a .nsubspa directive once at the
9548
             start of the function.  */
9549
          cfun->machine->in_nsubspa = 1;
9550
 
9551
          /* Create a new subspace for the text.  This provides
9552
             better stub placement and one-only functions.  */
9553
          if (cfun->decl
9554
              && DECL_ONE_ONLY (cfun->decl)
9555
              && !DECL_WEAK (cfun->decl))
9556
            {
9557
              output_section_asm_op ("\t.SPACE $TEXT$\n"
9558
                                     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9559
                                     "ACCESS=44,SORT=24,COMDAT");
9560
              return;
9561
            }
9562
        }
9563
      else
9564
        {
9565
          /* There isn't a current function or the body of the current
9566
             function has been completed.  So, we are changing to the
9567
             text section to output debugging information.  Thus, we
9568
             need to forget that we are in the text section so that
9569
             varasm.c will call us when text_section is selected again.  */
9570
          gcc_assert (!cfun || !cfun->machine
9571
                      || cfun->machine->in_nsubspa == 2);
9572
          in_section = NULL;
9573
        }
9574
      output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9575
      return;
9576
    }
9577
  output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9578
}
9579
 
9580
/* A get_unnamed_section callback for switching to comdat data
9581
   sections.  This function is only used with SOM.  */
9582
 
9583
static void
9584
som_output_comdat_data_section_asm_op (const void *data)
9585
{
9586
  in_section = NULL;
9587
  output_section_asm_op (data);
9588
}
9589
 
9590
/* Implement TARGET_ASM_INITIALIZE_SECTIONS  */
9591
 
9592
static void
9593
pa_som_asm_init_sections (void)
9594
{
9595
  text_section
9596
    = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9597
 
9598
  /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9599
     is not being generated.  */
9600
  som_readonly_data_section
9601
    = get_unnamed_section (0, output_section_asm_op,
9602
                           "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9603
 
9604
  /* When secondary definitions are not supported, SOM makes readonly
9605
     data one-only by creating a new $LIT$ subspace in $TEXT$ with
9606
     the comdat flag.  */
9607
  som_one_only_readonly_data_section
9608
    = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9609
                           "\t.SPACE $TEXT$\n"
9610
                           "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9611
                           "ACCESS=0x2c,SORT=16,COMDAT");
9612
 
9613
 
9614
  /* When secondary definitions are not supported, SOM makes data one-only
9615
     by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
9616
  som_one_only_data_section
9617
    = get_unnamed_section (SECTION_WRITE,
9618
                           som_output_comdat_data_section_asm_op,
9619
                           "\t.SPACE $PRIVATE$\n"
9620
                           "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9621
                           "ACCESS=31,SORT=24,COMDAT");
9622
 
9623
  /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9624
     which reference data within the $TEXT$ space (for example constant
9625
     strings in the $LIT$ subspace).
9626
 
9627
     The assemblers (GAS and HP as) both have problems with handling
9628
     the difference of two symbols which is the other correct way to
9629
     reference constant data during PIC code generation.
9630
 
9631
     So, there's no way to reference constant data which is in the
9632
     $TEXT$ space during PIC generation.  Instead place all constant
9633
     data into the $PRIVATE$ subspace (this reduces sharing, but it
9634
     works correctly).  */
9635
  readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9636
 
9637
  /* We must not have a reference to an external symbol defined in a
9638
     shared library in a readonly section, else the SOM linker will
9639
     complain.
9640
 
9641
     So, we force exception information into the data section.  */
9642
  exception_section = data_section;
9643
}
9644
 
9645
/* On hpux10, the linker will give an error if we have a reference
9646
   in the read-only data section to a symbol defined in a shared
9647
   library.  Therefore, expressions that might require a reloc can
9648
   not be placed in the read-only data section.  */
9649
 
9650
static section *
9651
pa_select_section (tree exp, int reloc,
9652
                   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9653
{
9654
  if (TREE_CODE (exp) == VAR_DECL
9655
      && TREE_READONLY (exp)
9656
      && !TREE_THIS_VOLATILE (exp)
9657
      && DECL_INITIAL (exp)
9658
      && (DECL_INITIAL (exp) == error_mark_node
9659
          || TREE_CONSTANT (DECL_INITIAL (exp)))
9660
      && !reloc)
9661
    {
9662
      if (TARGET_SOM
9663
          && DECL_ONE_ONLY (exp)
9664
          && !DECL_WEAK (exp))
9665
        return som_one_only_readonly_data_section;
9666
      else
9667
        return readonly_data_section;
9668
    }
9669
  else if (CONSTANT_CLASS_P (exp) && !reloc)
9670
    return readonly_data_section;
9671
  else if (TARGET_SOM
9672
           && TREE_CODE (exp) == VAR_DECL
9673
           && DECL_ONE_ONLY (exp)
9674
           && !DECL_WEAK (exp))
9675
    return som_one_only_data_section;
9676
  else
9677
    return data_section;
9678
}
9679
 
9680
static void
9681
pa_globalize_label (FILE *stream, const char *name)
9682
{
9683
  /* We only handle DATA objects here, functions are globalized in
9684
     ASM_DECLARE_FUNCTION_NAME.  */
9685
  if (! FUNCTION_NAME_P (name))
9686
  {
9687
    fputs ("\t.EXPORT ", stream);
9688
    assemble_name (stream, name);
9689
    fputs (",DATA\n", stream);
9690
  }
9691
}
9692
 
9693
/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9694
 
9695
static rtx
9696
pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9697
                     int incoming ATTRIBUTE_UNUSED)
9698
{
9699
  return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9700
}
9701
 
9702
/* Worker function for TARGET_RETURN_IN_MEMORY.  */
9703
 
9704
bool
9705
pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9706
{
9707
  /* SOM ABI says that objects larger than 64 bits are returned in memory.
9708
     PA64 ABI says that objects larger than 128 bits are returned in memory.
9709
     Note, int_size_in_bytes can return -1 if the size of the object is
9710
     variable or larger than the maximum value that can be expressed as
9711
     a HOST_WIDE_INT.   It can also return zero for an empty type.  The
9712
     simplest way to handle variable and empty types is to pass them in
9713
     memory.  This avoids problems in defining the boundaries of argument
9714
     slots, allocating registers, etc.  */
9715
  return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9716
          || int_size_in_bytes (type) <= 0);
9717
}
9718
 
9719
/* Structure to hold declaration and name of external symbols that are
9720
   emitted by GCC.  We generate a vector of these symbols and output them
9721
   at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9722
   This avoids putting out names that are never really used.  */
9723
 
9724
typedef struct GTY(()) extern_symbol
9725
{
9726
  tree decl;
9727
  const char *name;
9728
} extern_symbol;
9729
 
9730
/* Define gc'd vector type for extern_symbol.  */
9731
DEF_VEC_O(extern_symbol);
9732
DEF_VEC_ALLOC_O(extern_symbol,gc);
9733
 
9734
/* Vector of extern_symbol pointers.  */
9735
static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9736
 
9737
#ifdef ASM_OUTPUT_EXTERNAL_REAL
9738
/* Mark DECL (name NAME) as an external reference (assembler output
9739
   file FILE).  This saves the names to output at the end of the file
9740
   if actually referenced.  */
9741
 
9742
void
9743
pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9744
{
9745
  extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9746
 
9747
  gcc_assert (file == asm_out_file);
9748
  p->decl = decl;
9749
  p->name = name;
9750
}
9751
 
9752
/* Output text required at the end of an assembler file.
9753
   This includes deferred plabels and .import directives for
9754
   all external symbols that were actually referenced.  */
9755
 
9756
static void
9757
pa_hpux_file_end (void)
9758
{
9759
  unsigned int i;
9760
  extern_symbol *p;
9761
 
9762
  if (!NO_DEFERRED_PROFILE_COUNTERS)
9763
    output_deferred_profile_counters ();
9764
 
9765
  output_deferred_plabels ();
9766
 
9767
  for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9768
    {
9769
      tree decl = p->decl;
9770
 
9771
      if (!TREE_ASM_WRITTEN (decl)
9772
          && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9773
        ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9774
    }
9775
 
9776
  VEC_free (extern_symbol, gc, extern_symbols);
9777
}
9778
#endif
9779
 
9780
/* Return true if a change from mode FROM to mode TO for a register
9781
   in register class RCLASS is invalid.  */
9782
 
9783
bool
9784
pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9785
                             enum reg_class rclass)
9786
{
9787
  if (from == to)
9788
    return false;
9789
 
9790
  /* Reject changes to/from complex and vector modes.  */
9791
  if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9792
      || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9793
    return true;
9794
 
9795
  if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9796
    return false;
9797
 
9798
  /* There is no way to load QImode or HImode values directly from
9799
     memory.  SImode loads to the FP registers are not zero extended.
9800
     On the 64-bit target, this conflicts with the definition of
9801
     LOAD_EXTEND_OP.  Thus, we can't allow changing between modes
9802
     with different sizes in the floating-point registers.  */
9803
  if (MAYBE_FP_REG_CLASS_P (rclass))
9804
    return true;
9805
 
9806
  /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9807
     in specific sets of registers.  Thus, we cannot allow changing
9808
     to a larger mode when it's larger than a word.  */
9809
  if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9810
      && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9811
    return true;
9812
 
9813
  return false;
9814
}
9815
 
9816
/* Returns TRUE if it is a good idea to tie two pseudo registers
9817
   when one has mode MODE1 and one has mode MODE2.
9818
   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9819
   for any hard reg, then this must be FALSE for correct output.
9820
 
9821
   We should return FALSE for QImode and HImode because these modes
9822
   are not ok in the floating-point registers.  However, this prevents
9823
   tieing these modes to SImode and DImode in the general registers.
9824
   So, this isn't a good idea.  We rely on HARD_REGNO_MODE_OK and
9825
   CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9826
   in the floating-point registers.  */
9827
 
9828
bool
9829
pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9830
{
9831
  /* Don't tie modes in different classes.  */
9832
  if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9833
    return false;
9834
 
9835
  return true;
9836
}
9837
 
9838
 
9839
/* Length in units of the trampoline instruction code.  */
9840
 
9841
#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9842
 
9843
 
9844
/* Output assembler code for a block containing the constant parts
9845
   of a trampoline, leaving space for the variable parts.\
9846
 
9847
   The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9848
   and then branches to the specified routine.
9849
 
9850
   This code template is copied from text segment to stack location
9851
   and then patched with pa_trampoline_init to contain valid values,
9852
   and then entered as a subroutine.
9853
 
9854
   It is best to keep this as small as possible to avoid having to
9855
   flush multiple lines in the cache.  */
9856
 
9857
static void
9858
pa_asm_trampoline_template (FILE *f)
9859
{
9860
  if (!TARGET_64BIT)
9861
    {
9862
      fputs ("\tldw     36(%r22),%r21\n", f);
9863
      fputs ("\tbb,>=,n %r21,30,.+16\n", f);
9864
      if (ASSEMBLER_DIALECT == 0)
9865
        fputs ("\tdepi  0,31,2,%r21\n", f);
9866
      else
9867
        fputs ("\tdepwi 0,31,2,%r21\n", f);
9868
      fputs ("\tldw     4(%r21),%r19\n", f);
9869
      fputs ("\tldw     0(%r21),%r21\n", f);
9870
      if (TARGET_PA_20)
9871
        {
9872
          fputs ("\tbve (%r21)\n", f);
9873
          fputs ("\tldw 40(%r22),%r29\n", f);
9874
          fputs ("\t.word       0\n", f);
9875
          fputs ("\t.word       0\n", f);
9876
        }
9877
      else
9878
        {
9879
          fputs ("\tldsid       (%r21),%r1\n", f);
9880
          fputs ("\tmtsp        %r1,%sr0\n", f);
9881
          fputs ("\tbe  0(%sr0,%r21)\n", f);
9882
          fputs ("\tldw 40(%r22),%r29\n", f);
9883
        }
9884
      fputs ("\t.word   0\n", f);
9885
      fputs ("\t.word   0\n", f);
9886
      fputs ("\t.word   0\n", f);
9887
      fputs ("\t.word   0\n", f);
9888
    }
9889
  else
9890
    {
9891
      fputs ("\t.dword 0\n", f);
9892
      fputs ("\t.dword 0\n", f);
9893
      fputs ("\t.dword 0\n", f);
9894
      fputs ("\t.dword 0\n", f);
9895
      fputs ("\tmfia    %r31\n", f);
9896
      fputs ("\tldd     24(%r31),%r1\n", f);
9897
      fputs ("\tldd     24(%r1),%r27\n", f);
9898
      fputs ("\tldd     16(%r1),%r1\n", f);
9899
      fputs ("\tbve     (%r1)\n", f);
9900
      fputs ("\tldd     32(%r31),%r31\n", f);
9901
      fputs ("\t.dword 0  ; fptr\n", f);
9902
      fputs ("\t.dword 0  ; static link\n", f);
9903
    }
9904
}
9905
 
9906
/* Emit RTL insns to initialize the variable parts of a trampoline.
9907
   FNADDR is an RTX for the address of the function's pure code.
9908
   CXT is an RTX for the static chain value for the function.
9909
 
9910
   Move the function address to the trampoline template at offset 36.
9911
   Move the static chain value to trampoline template at offset 40.
9912
   Move the trampoline address to trampoline template at offset 44.
9913
   Move r19 to trampoline template at offset 48.  The latter two
9914
   words create a plabel for the indirect call to the trampoline.
9915
 
9916
   A similar sequence is used for the 64-bit port but the plabel is
9917
   at the beginning of the trampoline.
9918
 
9919
   Finally, the cache entries for the trampoline code are flushed.
9920
   This is necessary to ensure that the trampoline instruction sequence
9921
   is written to memory prior to any attempts at prefetching the code
9922
   sequence.  */
9923
 
9924
static void
9925
pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
9926
{
9927
  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
9928
  rtx start_addr = gen_reg_rtx (Pmode);
9929
  rtx end_addr = gen_reg_rtx (Pmode);
9930
  rtx line_length = gen_reg_rtx (Pmode);
9931
  rtx r_tramp, tmp;
9932
 
9933
  emit_block_move (m_tramp, assemble_trampoline_template (),
9934
                   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
9935
  r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
9936
 
9937
  if (!TARGET_64BIT)
9938
    {
9939
      tmp = adjust_address (m_tramp, Pmode, 36);
9940
      emit_move_insn (tmp, fnaddr);
9941
      tmp = adjust_address (m_tramp, Pmode, 40);
9942
      emit_move_insn (tmp, chain_value);
9943
 
9944
      /* Create a fat pointer for the trampoline.  */
9945
      tmp = adjust_address (m_tramp, Pmode, 44);
9946
      emit_move_insn (tmp, r_tramp);
9947
      tmp = adjust_address (m_tramp, Pmode, 48);
9948
      emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
9949
 
9950
      /* fdc and fic only use registers for the address to flush,
9951
         they do not accept integer displacements.  We align the
9952
         start and end addresses to the beginning of their respective
9953
         cache lines to minimize the number of lines flushed.  */
9954
      emit_insn (gen_andsi3 (start_addr, r_tramp,
9955
                             GEN_INT (-MIN_CACHELINE_SIZE)));
9956
      tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1));
9957
      emit_insn (gen_andsi3 (end_addr, tmp,
9958
                             GEN_INT (-MIN_CACHELINE_SIZE)));
9959
      emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
9960
      emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
9961
      emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
9962
                                    gen_reg_rtx (Pmode),
9963
                                    gen_reg_rtx (Pmode)));
9964
    }
9965
  else
9966
    {
9967
      tmp = adjust_address (m_tramp, Pmode, 56);
9968
      emit_move_insn (tmp, fnaddr);
9969
      tmp = adjust_address (m_tramp, Pmode, 64);
9970
      emit_move_insn (tmp, chain_value);
9971
 
9972
      /* Create a fat pointer for the trampoline.  */
9973
      tmp = adjust_address (m_tramp, Pmode, 16);
9974
      emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32)));
9975
      tmp = adjust_address (m_tramp, Pmode, 24);
9976
      emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
9977
 
9978
      /* fdc and fic only use registers for the address to flush,
9979
         they do not accept integer displacements.  We align the
9980
         start and end addresses to the beginning of their respective
9981
         cache lines to minimize the number of lines flushed.  */
9982
      tmp = force_reg (Pmode, plus_constant (r_tramp, 32));
9983
      emit_insn (gen_anddi3 (start_addr, tmp,
9984
                             GEN_INT (-MIN_CACHELINE_SIZE)));
9985
      tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1));
9986
      emit_insn (gen_anddi3 (end_addr, tmp,
9987
                             GEN_INT (-MIN_CACHELINE_SIZE)));
9988
      emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
9989
      emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
9990
      emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
9991
                                    gen_reg_rtx (Pmode),
9992
                                    gen_reg_rtx (Pmode)));
9993
    }
9994
}
9995
 
9996
/* Perform any machine-specific adjustment in the address of the trampoline.
9997
   ADDR contains the address that was passed to pa_trampoline_init.
9998
   Adjust the trampoline address to point to the plabel at offset 44.  */
9999
 
10000
static rtx
10001
pa_trampoline_adjust_address (rtx addr)
10002
{
10003
  if (!TARGET_64BIT)
10004
    addr = memory_address (Pmode, plus_constant (addr, 46));
10005
  return addr;
10006
}
10007
 
10008
static rtx
10009
pa_delegitimize_address (rtx orig_x)
10010
{
10011
  rtx x = delegitimize_mem_from_attrs (orig_x);
10012
 
10013
  if (GET_CODE (x) == LO_SUM
10014
      && GET_CODE (XEXP (x, 1)) == UNSPEC
10015
      && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10016
    return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10017
  return x;
10018
}
10019
 
10020
#include "gt-pa.h"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.