OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-old/] [gcc-4.2.2/] [gcc/] [config/] [pa/] [pa.c] - Blame information for rev 816

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Subroutines for insn-output.c for HPPA.
2
   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3
   2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
   Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5
 
6
This file is part of GCC.
7
 
8
GCC is free software; you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation; either version 3, or (at your option)
11
any later version.
12
 
13
GCC is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
GNU General Public License for more details.
17
 
18
You should have received a copy of the GNU General Public License
19
along with GCC; see the file COPYING3.  If not see
20
<http://www.gnu.org/licenses/>.  */
21
 
22
#include "config.h"
23
#include "system.h"
24
#include "coretypes.h"
25
#include "tm.h"
26
#include "rtl.h"
27
#include "regs.h"
28
#include "hard-reg-set.h"
29
#include "real.h"
30
#include "insn-config.h"
31
#include "conditions.h"
32
#include "insn-attr.h"
33
#include "flags.h"
34
#include "tree.h"
35
#include "output.h"
36
#include "except.h"
37
#include "expr.h"
38
#include "optabs.h"
39
#include "reload.h"
40
#include "integrate.h"
41
#include "function.h"
42
#include "toplev.h"
43
#include "ggc.h"
44
#include "recog.h"
45
#include "predict.h"
46
#include "tm_p.h"
47
#include "target.h"
48
#include "target-def.h"
49
 
50
/* Return nonzero if there is a bypass for the output of
51
   OUT_INSN and the fp store IN_INSN.  */
52
int
53
hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
54
{
55
  enum machine_mode store_mode;
56
  enum machine_mode other_mode;
57
  rtx set;
58
 
59
  if (recog_memoized (in_insn) < 0
60
      || get_attr_type (in_insn) != TYPE_FPSTORE
61
      || recog_memoized (out_insn) < 0)
62
    return 0;
63
 
64
  store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
65
 
66
  set = single_set (out_insn);
67
  if (!set)
68
    return 0;
69
 
70
  other_mode = GET_MODE (SET_SRC (set));
71
 
72
  return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
73
}
74
 
75
 
76
#ifndef DO_FRAME_NOTES
77
#ifdef INCOMING_RETURN_ADDR_RTX
78
#define DO_FRAME_NOTES 1
79
#else
80
#define DO_FRAME_NOTES 0
81
#endif
82
#endif
83
 
84
static void copy_reg_pointer (rtx, rtx);
85
static void fix_range (const char *);
86
static bool pa_handle_option (size_t, const char *, int);
87
static int hppa_address_cost (rtx);
88
static bool hppa_rtx_costs (rtx, int, int, int *);
89
static inline rtx force_mode (enum machine_mode, rtx);
90
static void pa_reorg (void);
91
static void pa_combine_instructions (void);
92
static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
93
static int forward_branch_p (rtx);
94
static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
95
static int compute_movmem_length (rtx);
96
static int compute_clrmem_length (rtx);
97
static bool pa_assemble_integer (rtx, unsigned int, int);
98
static void remove_useless_addtr_insns (int);
99
static void store_reg (int, HOST_WIDE_INT, int);
100
static void store_reg_modify (int, int, HOST_WIDE_INT);
101
static void load_reg (int, HOST_WIDE_INT, int);
102
static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
103
static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
104
static void update_total_code_bytes (int);
105
static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
106
static int pa_adjust_cost (rtx, rtx, rtx, int);
107
static int pa_adjust_priority (rtx, int);
108
static int pa_issue_rate (void);
109
static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
110
static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
111
     ATTRIBUTE_UNUSED;
112
static void pa_encode_section_info (tree, rtx, int);
113
static const char *pa_strip_name_encoding (const char *);
114
static bool pa_function_ok_for_sibcall (tree, tree);
115
static void pa_globalize_label (FILE *, const char *)
116
     ATTRIBUTE_UNUSED;
117
static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
118
                                    HOST_WIDE_INT, tree);
119
#if !defined(USE_COLLECT2)
120
static void pa_asm_out_constructor (rtx, int);
121
static void pa_asm_out_destructor (rtx, int);
122
#endif
123
static void pa_init_builtins (void);
124
static rtx hppa_builtin_saveregs (void);
125
static tree hppa_gimplify_va_arg_expr (tree, tree, tree *, tree *);
126
static bool pa_scalar_mode_supported_p (enum machine_mode);
127
static bool pa_commutative_p (rtx x, int outer_code);
128
static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
129
static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
130
static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
131
static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
132
static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
133
static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
134
static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
135
static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
136
static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
137
static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
138
static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
139
static void output_deferred_plabels (void);
140
static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
141
#ifdef ASM_OUTPUT_EXTERNAL_REAL
142
static void pa_hpux_file_end (void);
143
#endif
144
#ifdef HPUX_LONG_DOUBLE_LIBRARY
145
static void pa_hpux_init_libfuncs (void);
146
#endif
147
static rtx pa_struct_value_rtx (tree, int);
148
static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
149
                                  tree, bool);
150
static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
151
                                 tree, bool);
152
static struct machine_function * pa_init_machine_status (void);
153
static enum reg_class pa_secondary_reload (bool, rtx, enum reg_class,
154
                                           enum machine_mode,
155
                                           secondary_reload_info *);
156
 
157
 
158
/* The following extra sections are only used for SOM.  */
159
static GTY(()) section *som_readonly_data_section;
160
static GTY(()) section *som_one_only_readonly_data_section;
161
static GTY(()) section *som_one_only_data_section;
162
 
163
/* Save the operands last given to a compare for use when we
164
   generate a scc or bcc insn.  */
165
rtx hppa_compare_op0, hppa_compare_op1;
166
enum cmp_type hppa_branch_type;
167
 
168
/* Which cpu we are scheduling for.  */
169
enum processor_type pa_cpu = TARGET_SCHED_DEFAULT;
170
 
171
/* The UNIX standard to use for predefines and linking.  */
172
int flag_pa_unix = TARGET_HPUX_11_11 ? 1998 : TARGET_HPUX_10_10 ? 1995 : 1993;
173
 
174
/* Counts for the number of callee-saved general and floating point
175
   registers which were saved by the current function's prologue.  */
176
static int gr_saved, fr_saved;
177
 
178
static rtx find_addr_reg (rtx);
179
 
180
/* Keep track of the number of bytes we have output in the CODE subspace
181
   during this compilation so we'll know when to emit inline long-calls.  */
182
unsigned long total_code_bytes;
183
 
184
/* The last address of the previous function plus the number of bytes in
185
   associated thunks that have been output.  This is used to determine if
186
   a thunk can use an IA-relative branch to reach its target function.  */
187
static int last_address;
188
 
189
/* Variables to handle plabels that we discover are necessary at assembly
190
   output time.  They are output after the current function.  */
191
struct deferred_plabel GTY(())
192
{
193
  rtx internal_label;
194
  rtx symbol;
195
};
196
static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
197
  deferred_plabels;
198
static size_t n_deferred_plabels = 0;
199
 
200
 
201
/* Initialize the GCC target structure.  */
202
 
203
#undef TARGET_ASM_ALIGNED_HI_OP
204
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
205
#undef TARGET_ASM_ALIGNED_SI_OP
206
#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
207
#undef TARGET_ASM_ALIGNED_DI_OP
208
#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
209
#undef TARGET_ASM_UNALIGNED_HI_OP
210
#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
211
#undef TARGET_ASM_UNALIGNED_SI_OP
212
#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
213
#undef TARGET_ASM_UNALIGNED_DI_OP
214
#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
215
#undef TARGET_ASM_INTEGER
216
#define TARGET_ASM_INTEGER pa_assemble_integer
217
 
218
#undef TARGET_ASM_FUNCTION_PROLOGUE
219
#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
220
#undef TARGET_ASM_FUNCTION_EPILOGUE
221
#define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
222
 
223
#undef TARGET_SCHED_ADJUST_COST
224
#define TARGET_SCHED_ADJUST_COST pa_adjust_cost
225
#undef TARGET_SCHED_ADJUST_PRIORITY
226
#define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
227
#undef TARGET_SCHED_ISSUE_RATE
228
#define TARGET_SCHED_ISSUE_RATE pa_issue_rate
229
 
230
#undef TARGET_ENCODE_SECTION_INFO
231
#define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
232
#undef TARGET_STRIP_NAME_ENCODING
233
#define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
234
 
235
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
236
#define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
237
 
238
#undef TARGET_COMMUTATIVE_P
239
#define TARGET_COMMUTATIVE_P pa_commutative_p
240
 
241
#undef TARGET_ASM_OUTPUT_MI_THUNK
242
#define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
243
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
244
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
245
 
246
#undef TARGET_ASM_FILE_END
247
#ifdef ASM_OUTPUT_EXTERNAL_REAL
248
#define TARGET_ASM_FILE_END pa_hpux_file_end
249
#else
250
#define TARGET_ASM_FILE_END output_deferred_plabels
251
#endif
252
 
253
#if !defined(USE_COLLECT2)
254
#undef TARGET_ASM_CONSTRUCTOR
255
#define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
256
#undef TARGET_ASM_DESTRUCTOR
257
#define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
258
#endif
259
 
260
#undef TARGET_DEFAULT_TARGET_FLAGS
261
#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
262
#undef TARGET_HANDLE_OPTION
263
#define TARGET_HANDLE_OPTION pa_handle_option
264
 
265
#undef TARGET_INIT_BUILTINS
266
#define TARGET_INIT_BUILTINS pa_init_builtins
267
 
268
#undef TARGET_RTX_COSTS
269
#define TARGET_RTX_COSTS hppa_rtx_costs
270
#undef TARGET_ADDRESS_COST
271
#define TARGET_ADDRESS_COST hppa_address_cost
272
 
273
#undef TARGET_MACHINE_DEPENDENT_REORG
274
#define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
275
 
276
#ifdef HPUX_LONG_DOUBLE_LIBRARY
277
#undef TARGET_INIT_LIBFUNCS
278
#define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
279
#endif
280
 
281
#undef TARGET_PROMOTE_FUNCTION_RETURN
282
#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
283
#undef TARGET_PROMOTE_PROTOTYPES
284
#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
285
 
286
#undef TARGET_STRUCT_VALUE_RTX
287
#define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
288
#undef TARGET_RETURN_IN_MEMORY
289
#define TARGET_RETURN_IN_MEMORY pa_return_in_memory
290
#undef TARGET_MUST_PASS_IN_STACK
291
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
292
#undef TARGET_PASS_BY_REFERENCE
293
#define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
294
#undef TARGET_CALLEE_COPIES
295
#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
296
#undef TARGET_ARG_PARTIAL_BYTES
297
#define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
298
 
299
#undef TARGET_EXPAND_BUILTIN_SAVEREGS
300
#define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
301
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
302
#define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
303
 
304
#undef TARGET_SCALAR_MODE_SUPPORTED_P
305
#define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
306
 
307
#undef TARGET_CANNOT_FORCE_CONST_MEM
308
#define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
309
 
310
#undef TARGET_SECONDARY_RELOAD
311
#define TARGET_SECONDARY_RELOAD pa_secondary_reload
312
 
313
struct gcc_target targetm = TARGET_INITIALIZER;
314
 
315
/* Parse the -mfixed-range= option string.  */
316
 
317
static void
318
fix_range (const char *const_str)
319
{
320
  int i, first, last;
321
  char *str, *dash, *comma;
322
 
323
  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
324
     REG2 are either register names or register numbers.  The effect
325
     of this option is to mark the registers in the range from REG1 to
326
     REG2 as ``fixed'' so they won't be used by the compiler.  This is
327
     used, e.g., to ensure that kernel mode code doesn't use fr4-fr31.  */
328
 
329
  i = strlen (const_str);
330
  str = (char *) alloca (i + 1);
331
  memcpy (str, const_str, i + 1);
332
 
333
  while (1)
334
    {
335
      dash = strchr (str, '-');
336
      if (!dash)
337
        {
338
          warning (0, "value of -mfixed-range must have form REG1-REG2");
339
          return;
340
        }
341
      *dash = '\0';
342
 
343
      comma = strchr (dash + 1, ',');
344
      if (comma)
345
        *comma = '\0';
346
 
347
      first = decode_reg_name (str);
348
      if (first < 0)
349
        {
350
          warning (0, "unknown register name: %s", str);
351
          return;
352
        }
353
 
354
      last = decode_reg_name (dash + 1);
355
      if (last < 0)
356
        {
357
          warning (0, "unknown register name: %s", dash + 1);
358
          return;
359
        }
360
 
361
      *dash = '-';
362
 
363
      if (first > last)
364
        {
365
          warning (0, "%s-%s is an empty range", str, dash + 1);
366
          return;
367
        }
368
 
369
      for (i = first; i <= last; ++i)
370
        fixed_regs[i] = call_used_regs[i] = 1;
371
 
372
      if (!comma)
373
        break;
374
 
375
      *comma = ',';
376
      str = comma + 1;
377
    }
378
 
379
  /* Check if all floating point registers have been fixed.  */
380
  for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
381
    if (!fixed_regs[i])
382
      break;
383
 
384
  if (i > FP_REG_LAST)
385
    target_flags |= MASK_DISABLE_FPREGS;
386
}
387
 
388
/* Implement TARGET_HANDLE_OPTION.  */
389
 
390
static bool
391
pa_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
392
{
393
  switch (code)
394
    {
395
    case OPT_mnosnake:
396
    case OPT_mpa_risc_1_0:
397
    case OPT_march_1_0:
398
      target_flags &= ~(MASK_PA_11 | MASK_PA_20);
399
      return true;
400
 
401
    case OPT_msnake:
402
    case OPT_mpa_risc_1_1:
403
    case OPT_march_1_1:
404
      target_flags &= ~MASK_PA_20;
405
      target_flags |= MASK_PA_11;
406
      return true;
407
 
408
    case OPT_mpa_risc_2_0:
409
    case OPT_march_2_0:
410
      target_flags |= MASK_PA_11 | MASK_PA_20;
411
      return true;
412
 
413
    case OPT_mschedule_:
414
      if (strcmp (arg, "8000") == 0)
415
        pa_cpu = PROCESSOR_8000;
416
      else if (strcmp (arg, "7100") == 0)
417
        pa_cpu = PROCESSOR_7100;
418
      else if (strcmp (arg, "700") == 0)
419
        pa_cpu = PROCESSOR_700;
420
      else if (strcmp (arg, "7100LC") == 0)
421
        pa_cpu = PROCESSOR_7100LC;
422
      else if (strcmp (arg, "7200") == 0)
423
        pa_cpu = PROCESSOR_7200;
424
      else if (strcmp (arg, "7300") == 0)
425
        pa_cpu = PROCESSOR_7300;
426
      else
427
        return false;
428
      return true;
429
 
430
    case OPT_mfixed_range_:
431
      fix_range (arg);
432
      return true;
433
 
434
#if TARGET_HPUX
435
    case OPT_munix_93:
436
      flag_pa_unix = 1993;
437
      return true;
438
#endif
439
 
440
#if TARGET_HPUX_10_10
441
    case OPT_munix_95:
442
      flag_pa_unix = 1995;
443
      return true;
444
#endif
445
 
446
#if TARGET_HPUX_11_11
447
    case OPT_munix_98:
448
      flag_pa_unix = 1998;
449
      return true;
450
#endif
451
 
452
    default:
453
      return true;
454
    }
455
}
456
 
457
void
458
override_options (void)
459
{
460
  /* Unconditional branches in the delay slot are not compatible with dwarf2
461
     call frame information.  There is no benefit in using this optimization
462
     on PA8000 and later processors.  */
463
  if (pa_cpu >= PROCESSOR_8000
464
      || (! USING_SJLJ_EXCEPTIONS && flag_exceptions)
465
      || flag_unwind_tables)
466
    target_flags &= ~MASK_JUMP_IN_DELAY;
467
 
468
  if (flag_pic && TARGET_PORTABLE_RUNTIME)
469
    {
470
      warning (0, "PIC code generation is not supported in the portable runtime model");
471
    }
472
 
473
  if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
474
   {
475
      warning (0, "PIC code generation is not compatible with fast indirect calls");
476
   }
477
 
478
  if (! TARGET_GAS && write_symbols != NO_DEBUG)
479
    {
480
      warning (0, "-g is only supported when using GAS on this processor,");
481
      warning (0, "-g option disabled");
482
      write_symbols = NO_DEBUG;
483
    }
484
 
485
  /* We only support the "big PIC" model now.  And we always generate PIC
486
     code when in 64bit mode.  */
487
  if (flag_pic == 1 || TARGET_64BIT)
488
    flag_pic = 2;
489
 
490
  /* We can't guarantee that .dword is available for 32-bit targets.  */
491
  if (UNITS_PER_WORD == 4)
492
    targetm.asm_out.aligned_op.di = NULL;
493
 
494
  /* The unaligned ops are only available when using GAS.  */
495
  if (!TARGET_GAS)
496
    {
497
      targetm.asm_out.unaligned_op.hi = NULL;
498
      targetm.asm_out.unaligned_op.si = NULL;
499
      targetm.asm_out.unaligned_op.di = NULL;
500
    }
501
 
502
  init_machine_status = pa_init_machine_status;
503
}
504
 
505
static void
506
pa_init_builtins (void)
507
{
508
#ifdef DONT_HAVE_FPUTC_UNLOCKED
509
  built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
510
    built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
511
  implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
512
    = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
513
#endif
514
}
515
 
516
/* Function to init struct machine_function.
517
   This will be called, via a pointer variable,
518
   from push_function_context.  */
519
 
520
static struct machine_function *
521
pa_init_machine_status (void)
522
{
523
  return ggc_alloc_cleared (sizeof (machine_function));
524
}
525
 
526
/* If FROM is a probable pointer register, mark TO as a probable
527
   pointer register with the same pointer alignment as FROM.  */
528
 
529
static void
530
copy_reg_pointer (rtx to, rtx from)
531
{
532
  if (REG_POINTER (from))
533
    mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
534
}
535
 
536
/* Return 1 if X contains a symbolic expression.  We know these
537
   expressions will have one of a few well defined forms, so
538
   we need only check those forms.  */
539
int
540
symbolic_expression_p (rtx x)
541
{
542
 
543
  /* Strip off any HIGH.  */
544
  if (GET_CODE (x) == HIGH)
545
    x = XEXP (x, 0);
546
 
547
  return (symbolic_operand (x, VOIDmode));
548
}
549
 
550
/* Accept any constant that can be moved in one instruction into a
551
   general register.  */
552
int
553
cint_ok_for_move (HOST_WIDE_INT intval)
554
{
555
  /* OK if ldo, ldil, or zdepi, can be used.  */
556
  return (CONST_OK_FOR_LETTER_P (intval, 'J')
557
          || CONST_OK_FOR_LETTER_P (intval, 'N')
558
          || CONST_OK_FOR_LETTER_P (intval, 'K'));
559
}
560
 
561
/* Return truth value of whether OP can be used as an operand in a
562
   adddi3 insn.  */
563
int
564
adddi3_operand (rtx op, enum machine_mode mode)
565
{
566
  return (register_operand (op, mode)
567
          || (GET_CODE (op) == CONST_INT
568
              && (TARGET_64BIT ? INT_14_BITS (op) : INT_11_BITS (op))));
569
}
570
 
571
/* True iff zdepi can be used to generate this CONST_INT.
572
   zdepi first sign extends a 5 bit signed number to a given field
573
   length, then places this field anywhere in a zero.  */
574
int
575
zdepi_cint_p (unsigned HOST_WIDE_INT x)
576
{
577
  unsigned HOST_WIDE_INT lsb_mask, t;
578
 
579
  /* This might not be obvious, but it's at least fast.
580
     This function is critical; we don't have the time loops would take.  */
581
  lsb_mask = x & -x;
582
  t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
583
  /* Return true iff t is a power of two.  */
584
  return ((t & (t - 1)) == 0);
585
}
586
 
587
/* True iff depi or extru can be used to compute (reg & mask).
588
   Accept bit pattern like these:
589
   0....01....1
590
   1....10....0
591
   1..10..01..1  */
592
int
593
and_mask_p (unsigned HOST_WIDE_INT mask)
594
{
595
  mask = ~mask;
596
  mask += mask & -mask;
597
  return (mask & (mask - 1)) == 0;
598
}
599
 
600
/* True iff depi can be used to compute (reg | MASK).  */
601
int
602
ior_mask_p (unsigned HOST_WIDE_INT mask)
603
{
604
  mask += mask & -mask;
605
  return (mask & (mask - 1)) == 0;
606
}
607
 
608
/* Legitimize PIC addresses.  If the address is already
609
   position-independent, we return ORIG.  Newly generated
610
   position-independent addresses go to REG.  If we need more
611
   than one register, we lose.  */
612
 
613
rtx
614
legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
615
{
616
  rtx pic_ref = orig;
617
 
618
  gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
619
 
620
  /* Labels need special handling.  */
621
  if (pic_label_operand (orig, mode))
622
    {
623
      /* We do not want to go through the movXX expanders here since that
624
         would create recursion.
625
 
626
         Nor do we really want to call a generator for a named pattern
627
         since that requires multiple patterns if we want to support
628
         multiple word sizes.
629
 
630
         So instead we just emit the raw set, which avoids the movXX
631
         expanders completely.  */
632
      mark_reg_pointer (reg, BITS_PER_UNIT);
633
      emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
634
      current_function_uses_pic_offset_table = 1;
635
      return reg;
636
    }
637
  if (GET_CODE (orig) == SYMBOL_REF)
638
    {
639
      rtx insn, tmp_reg;
640
 
641
      gcc_assert (reg);
642
 
643
      /* Before reload, allocate a temporary register for the intermediate
644
         result.  This allows the sequence to be deleted when the final
645
         result is unused and the insns are trivially dead.  */
646
      tmp_reg = ((reload_in_progress || reload_completed)
647
                 ? reg : gen_reg_rtx (Pmode));
648
 
649
      emit_move_insn (tmp_reg,
650
                      gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
651
                                    gen_rtx_HIGH (word_mode, orig)));
652
      pic_ref
653
        = gen_const_mem (Pmode,
654
                         gen_rtx_LO_SUM (Pmode, tmp_reg,
655
                                         gen_rtx_UNSPEC (Pmode,
656
                                                         gen_rtvec (1, orig),
657
                                                         UNSPEC_DLTIND14R)));
658
 
659
      current_function_uses_pic_offset_table = 1;
660
      mark_reg_pointer (reg, BITS_PER_UNIT);
661
      insn = emit_move_insn (reg, pic_ref);
662
 
663
      /* Put a REG_EQUAL note on this insn, so that it can be optimized.  */
664
      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig, REG_NOTES (insn));
665
 
666
      return reg;
667
    }
668
  else if (GET_CODE (orig) == CONST)
669
    {
670
      rtx base;
671
 
672
      if (GET_CODE (XEXP (orig, 0)) == PLUS
673
          && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
674
        return orig;
675
 
676
      gcc_assert (reg);
677
      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
678
 
679
      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
680
      orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
681
                                     base == reg ? 0 : reg);
682
 
683
      if (GET_CODE (orig) == CONST_INT)
684
        {
685
          if (INT_14_BITS (orig))
686
            return plus_constant (base, INTVAL (orig));
687
          orig = force_reg (Pmode, orig);
688
        }
689
      pic_ref = gen_rtx_PLUS (Pmode, base, orig);
690
      /* Likewise, should we set special REG_NOTEs here?  */
691
    }
692
 
693
  return pic_ref;
694
}
695
 
696
static GTY(()) rtx gen_tls_tga;
697
 
698
static rtx
699
gen_tls_get_addr (void)
700
{
701
  if (!gen_tls_tga)
702
    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
703
  return gen_tls_tga;
704
}
705
 
706
static rtx
707
hppa_tls_call (rtx arg)
708
{
709
  rtx ret;
710
 
711
  ret = gen_reg_rtx (Pmode);
712
  emit_library_call_value (gen_tls_get_addr (), ret,
713
                           LCT_CONST, Pmode, 1, arg, Pmode);
714
 
715
  return ret;
716
}
717
 
718
static rtx
719
legitimize_tls_address (rtx addr)
720
{
721
  rtx ret, insn, tmp, t1, t2, tp;
722
  enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
723
 
724
  switch (model)
725
    {
726
      case TLS_MODEL_GLOBAL_DYNAMIC:
727
        tmp = gen_reg_rtx (Pmode);
728
        if (flag_pic)
729
          emit_insn (gen_tgd_load_pic (tmp, addr));
730
        else
731
          emit_insn (gen_tgd_load (tmp, addr));
732
        ret = hppa_tls_call (tmp);
733
        break;
734
 
735
      case TLS_MODEL_LOCAL_DYNAMIC:
736
        ret = gen_reg_rtx (Pmode);
737
        tmp = gen_reg_rtx (Pmode);
738
        start_sequence ();
739
        if (flag_pic)
740
          emit_insn (gen_tld_load_pic (tmp, addr));
741
        else
742
          emit_insn (gen_tld_load (tmp, addr));
743
        t1 = hppa_tls_call (tmp);
744
        insn = get_insns ();
745
        end_sequence ();
746
        t2 = gen_reg_rtx (Pmode);
747
        emit_libcall_block (insn, t2, t1,
748
                            gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
749
                                            UNSPEC_TLSLDBASE));
750
        emit_insn (gen_tld_offset_load (ret, addr, t2));
751
        break;
752
 
753
      case TLS_MODEL_INITIAL_EXEC:
754
        tp = gen_reg_rtx (Pmode);
755
        tmp = gen_reg_rtx (Pmode);
756
        ret = gen_reg_rtx (Pmode);
757
        emit_insn (gen_tp_load (tp));
758
        if (flag_pic)
759
          emit_insn (gen_tie_load_pic (tmp, addr));
760
        else
761
          emit_insn (gen_tie_load (tmp, addr));
762
        emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
763
        break;
764
 
765
      case TLS_MODEL_LOCAL_EXEC:
766
        tp = gen_reg_rtx (Pmode);
767
        ret = gen_reg_rtx (Pmode);
768
        emit_insn (gen_tp_load (tp));
769
        emit_insn (gen_tle_load (ret, addr, tp));
770
        break;
771
 
772
      default:
773
        gcc_unreachable ();
774
    }
775
 
776
  return ret;
777
}
778
 
779
/* Try machine-dependent ways of modifying an illegitimate address
780
   to be legitimate.  If we find one, return the new, valid address.
781
   This macro is used in only one place: `memory_address' in explow.c.
782
 
783
   OLDX is the address as it was before break_out_memory_refs was called.
784
   In some cases it is useful to look at this to decide what needs to be done.
785
 
786
   MODE and WIN are passed so that this macro can use
787
   GO_IF_LEGITIMATE_ADDRESS.
788
 
789
   It is always safe for this macro to do nothing.  It exists to recognize
790
   opportunities to optimize the output.
791
 
792
   For the PA, transform:
793
 
794
        memory(X + <large int>)
795
 
796
   into:
797
 
798
        if (<large int> & mask) >= 16
799
          Y = (<large int> & ~mask) + mask + 1  Round up.
800
        else
801
          Y = (<large int> & ~mask)             Round down.
802
        Z = X + Y
803
        memory (Z + (<large int> - Y));
804
 
805
   This is for CSE to find several similar references, and only use one Z.
806
 
807
   X can either be a SYMBOL_REF or REG, but because combine cannot
808
   perform a 4->2 combination we do nothing for SYMBOL_REF + D where
809
   D will not fit in 14 bits.
810
 
811
   MODE_FLOAT references allow displacements which fit in 5 bits, so use
812
   0x1f as the mask.
813
 
814
   MODE_INT references allow displacements which fit in 14 bits, so use
815
   0x3fff as the mask.
816
 
817
   This relies on the fact that most mode MODE_FLOAT references will use FP
818
   registers and most mode MODE_INT references will use integer registers.
819
   (In the rare case of an FP register used in an integer MODE, we depend
820
   on secondary reloads to clean things up.)
821
 
822
 
823
   It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
824
   manner if Y is 2, 4, or 8.  (allows more shadd insns and shifted indexed
825
   addressing modes to be used).
826
 
827
   Put X and Z into registers.  Then put the entire expression into
828
   a register.  */
829
 
830
rtx
831
hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
832
                         enum machine_mode mode)
833
{
834
  rtx orig = x;
835
 
836
  /* We need to canonicalize the order of operands in unscaled indexed
837
     addresses since the code that checks if an address is valid doesn't
838
     always try both orders.  */
839
  if (!TARGET_NO_SPACE_REGS
840
      && GET_CODE (x) == PLUS
841
      && GET_MODE (x) == Pmode
842
      && REG_P (XEXP (x, 0))
843
      && REG_P (XEXP (x, 1))
844
      && REG_POINTER (XEXP (x, 0))
845
      && !REG_POINTER (XEXP (x, 1)))
846
    return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
847
 
848
  if (PA_SYMBOL_REF_TLS_P (x))
849
    return legitimize_tls_address (x);
850
  else if (flag_pic)
851
    return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
852
 
853
  /* Strip off CONST.  */
854
  if (GET_CODE (x) == CONST)
855
    x = XEXP (x, 0);
856
 
857
  /* Special case.  Get the SYMBOL_REF into a register and use indexing.
858
     That should always be safe.  */
859
  if (GET_CODE (x) == PLUS
860
      && GET_CODE (XEXP (x, 0)) == REG
861
      && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
862
    {
863
      rtx reg = force_reg (Pmode, XEXP (x, 1));
864
      return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
865
    }
866
 
867
  /* Note we must reject symbols which represent function addresses
868
     since the assembler/linker can't handle arithmetic on plabels.  */
869
  if (GET_CODE (x) == PLUS
870
      && GET_CODE (XEXP (x, 1)) == CONST_INT
871
      && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
872
           && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
873
          || GET_CODE (XEXP (x, 0)) == REG))
874
    {
875
      rtx int_part, ptr_reg;
876
      int newoffset;
877
      int offset = INTVAL (XEXP (x, 1));
878
      int mask;
879
 
880
      mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
881
              ? (TARGET_PA_20 ? 0x3fff : 0x1f) : 0x3fff);
882
 
883
      /* Choose which way to round the offset.  Round up if we
884
         are >= halfway to the next boundary.  */
885
      if ((offset & mask) >= ((mask + 1) / 2))
886
        newoffset = (offset & ~ mask) + mask + 1;
887
      else
888
        newoffset = (offset & ~ mask);
889
 
890
      /* If the newoffset will not fit in 14 bits (ldo), then
891
         handling this would take 4 or 5 instructions (2 to load
892
         the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
893
         add the new offset and the SYMBOL_REF.)  Combine can
894
         not handle 4->2 or 5->2 combinations, so do not create
895
         them.  */
896
      if (! VAL_14_BITS_P (newoffset)
897
          && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
898
        {
899
          rtx const_part = plus_constant (XEXP (x, 0), newoffset);
900
          rtx tmp_reg
901
            = force_reg (Pmode,
902
                         gen_rtx_HIGH (Pmode, const_part));
903
          ptr_reg
904
            = force_reg (Pmode,
905
                         gen_rtx_LO_SUM (Pmode,
906
                                         tmp_reg, const_part));
907
        }
908
      else
909
        {
910
          if (! VAL_14_BITS_P (newoffset))
911
            int_part = force_reg (Pmode, GEN_INT (newoffset));
912
          else
913
            int_part = GEN_INT (newoffset);
914
 
915
          ptr_reg = force_reg (Pmode,
916
                               gen_rtx_PLUS (Pmode,
917
                                             force_reg (Pmode, XEXP (x, 0)),
918
                                             int_part));
919
        }
920
      return plus_constant (ptr_reg, offset - newoffset);
921
    }
922
 
923
  /* Handle (plus (mult (a) (shadd_constant)) (b)).  */
924
 
925
  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
926
      && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
927
      && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
928
      && (OBJECT_P (XEXP (x, 1))
929
          || GET_CODE (XEXP (x, 1)) == SUBREG)
930
      && GET_CODE (XEXP (x, 1)) != CONST)
931
    {
932
      int val = INTVAL (XEXP (XEXP (x, 0), 1));
933
      rtx reg1, reg2;
934
 
935
      reg1 = XEXP (x, 1);
936
      if (GET_CODE (reg1) != REG)
937
        reg1 = force_reg (Pmode, force_operand (reg1, 0));
938
 
939
      reg2 = XEXP (XEXP (x, 0), 0);
940
      if (GET_CODE (reg2) != REG)
941
        reg2 = force_reg (Pmode, force_operand (reg2, 0));
942
 
943
      return force_reg (Pmode, gen_rtx_PLUS (Pmode,
944
                                             gen_rtx_MULT (Pmode,
945
                                                           reg2,
946
                                                           GEN_INT (val)),
947
                                             reg1));
948
    }
949
 
950
  /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
951
 
952
     Only do so for floating point modes since this is more speculative
953
     and we lose if it's an integer store.  */
954
  if (GET_CODE (x) == PLUS
955
      && GET_CODE (XEXP (x, 0)) == PLUS
956
      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
957
      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
958
      && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
959
      && (mode == SFmode || mode == DFmode))
960
    {
961
 
962
      /* First, try and figure out what to use as a base register.  */
963
      rtx reg1, reg2, base, idx, orig_base;
964
 
965
      reg1 = XEXP (XEXP (x, 0), 1);
966
      reg2 = XEXP (x, 1);
967
      base = NULL_RTX;
968
      idx = NULL_RTX;
969
 
970
      /* Make sure they're both regs.  If one was a SYMBOL_REF [+ const],
971
         then emit_move_sequence will turn on REG_POINTER so we'll know
972
         it's a base register below.  */
973
      if (GET_CODE (reg1) != REG)
974
        reg1 = force_reg (Pmode, force_operand (reg1, 0));
975
 
976
      if (GET_CODE (reg2) != REG)
977
        reg2 = force_reg (Pmode, force_operand (reg2, 0));
978
 
979
      /* Figure out what the base and index are.  */
980
 
981
      if (GET_CODE (reg1) == REG
982
          && REG_POINTER (reg1))
983
        {
984
          base = reg1;
985
          orig_base = XEXP (XEXP (x, 0), 1);
986
          idx = gen_rtx_PLUS (Pmode,
987
                              gen_rtx_MULT (Pmode,
988
                                            XEXP (XEXP (XEXP (x, 0), 0), 0),
989
                                            XEXP (XEXP (XEXP (x, 0), 0), 1)),
990
                              XEXP (x, 1));
991
        }
992
      else if (GET_CODE (reg2) == REG
993
               && REG_POINTER (reg2))
994
        {
995
          base = reg2;
996
          orig_base = XEXP (x, 1);
997
          idx = XEXP (x, 0);
998
        }
999
 
1000
      if (base == 0)
1001
        return orig;
1002
 
1003
      /* If the index adds a large constant, try to scale the
1004
         constant so that it can be loaded with only one insn.  */
1005
      if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1006
          && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1007
                            / INTVAL (XEXP (XEXP (idx, 0), 1)))
1008
          && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1009
        {
1010
          /* Divide the CONST_INT by the scale factor, then add it to A.  */
1011
          int val = INTVAL (XEXP (idx, 1));
1012
 
1013
          val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1014
          reg1 = XEXP (XEXP (idx, 0), 0);
1015
          if (GET_CODE (reg1) != REG)
1016
            reg1 = force_reg (Pmode, force_operand (reg1, 0));
1017
 
1018
          reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1019
 
1020
          /* We can now generate a simple scaled indexed address.  */
1021
          return
1022
            force_reg
1023
              (Pmode, gen_rtx_PLUS (Pmode,
1024
                                    gen_rtx_MULT (Pmode, reg1,
1025
                                                  XEXP (XEXP (idx, 0), 1)),
1026
                                    base));
1027
        }
1028
 
1029
      /* If B + C is still a valid base register, then add them.  */
1030
      if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1031
          && INTVAL (XEXP (idx, 1)) <= 4096
1032
          && INTVAL (XEXP (idx, 1)) >= -4096)
1033
        {
1034
          int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1035
          rtx reg1, reg2;
1036
 
1037
          reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1038
 
1039
          reg2 = XEXP (XEXP (idx, 0), 0);
1040
          if (GET_CODE (reg2) != CONST_INT)
1041
            reg2 = force_reg (Pmode, force_operand (reg2, 0));
1042
 
1043
          return force_reg (Pmode, gen_rtx_PLUS (Pmode,
1044
                                                 gen_rtx_MULT (Pmode,
1045
                                                               reg2,
1046
                                                               GEN_INT (val)),
1047
                                                 reg1));
1048
        }
1049
 
1050
      /* Get the index into a register, then add the base + index and
1051
         return a register holding the result.  */
1052
 
1053
      /* First get A into a register.  */
1054
      reg1 = XEXP (XEXP (idx, 0), 0);
1055
      if (GET_CODE (reg1) != REG)
1056
        reg1 = force_reg (Pmode, force_operand (reg1, 0));
1057
 
1058
      /* And get B into a register.  */
1059
      reg2 = XEXP (idx, 1);
1060
      if (GET_CODE (reg2) != REG)
1061
        reg2 = force_reg (Pmode, force_operand (reg2, 0));
1062
 
1063
      reg1 = force_reg (Pmode,
1064
                        gen_rtx_PLUS (Pmode,
1065
                                      gen_rtx_MULT (Pmode, reg1,
1066
                                                    XEXP (XEXP (idx, 0), 1)),
1067
                                      reg2));
1068
 
1069
      /* Add the result to our base register and return.  */
1070
      return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1071
 
1072
    }
1073
 
1074
  /* Uh-oh.  We might have an address for x[n-100000].  This needs
1075
     special handling to avoid creating an indexed memory address
1076
     with x-100000 as the base.
1077
 
1078
     If the constant part is small enough, then it's still safe because
1079
     there is a guard page at the beginning and end of the data segment.
1080
 
1081
     Scaled references are common enough that we want to try and rearrange the
1082
     terms so that we can use indexing for these addresses too.  Only
1083
     do the optimization for floatint point modes.  */
1084
 
1085
  if (GET_CODE (x) == PLUS
1086
      && symbolic_expression_p (XEXP (x, 1)))
1087
    {
1088
      /* Ugly.  We modify things here so that the address offset specified
1089
         by the index expression is computed first, then added to x to form
1090
         the entire address.  */
1091
 
1092
      rtx regx1, regx2, regy1, regy2, y;
1093
 
1094
      /* Strip off any CONST.  */
1095
      y = XEXP (x, 1);
1096
      if (GET_CODE (y) == CONST)
1097
        y = XEXP (y, 0);
1098
 
1099
      if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1100
        {
1101
          /* See if this looks like
1102
                (plus (mult (reg) (shadd_const))
1103
                      (const (plus (symbol_ref) (const_int))))
1104
 
1105
             Where const_int is small.  In that case the const
1106
             expression is a valid pointer for indexing.
1107
 
1108
             If const_int is big, but can be divided evenly by shadd_const
1109
             and added to (reg).  This allows more scaled indexed addresses.  */
1110
          if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1111
              && GET_CODE (XEXP (x, 0)) == MULT
1112
              && GET_CODE (XEXP (y, 1)) == CONST_INT
1113
              && INTVAL (XEXP (y, 1)) >= -4096
1114
              && INTVAL (XEXP (y, 1)) <= 4095
1115
              && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1116
              && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1117
            {
1118
              int val = INTVAL (XEXP (XEXP (x, 0), 1));
1119
              rtx reg1, reg2;
1120
 
1121
              reg1 = XEXP (x, 1);
1122
              if (GET_CODE (reg1) != REG)
1123
                reg1 = force_reg (Pmode, force_operand (reg1, 0));
1124
 
1125
              reg2 = XEXP (XEXP (x, 0), 0);
1126
              if (GET_CODE (reg2) != REG)
1127
                reg2 = force_reg (Pmode, force_operand (reg2, 0));
1128
 
1129
              return force_reg (Pmode,
1130
                                gen_rtx_PLUS (Pmode,
1131
                                              gen_rtx_MULT (Pmode,
1132
                                                            reg2,
1133
                                                            GEN_INT (val)),
1134
                                              reg1));
1135
            }
1136
          else if ((mode == DFmode || mode == SFmode)
1137
                   && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1138
                   && GET_CODE (XEXP (x, 0)) == MULT
1139
                   && GET_CODE (XEXP (y, 1)) == CONST_INT
1140
                   && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1141
                   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1142
                   && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1143
            {
1144
              regx1
1145
                = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1146
                                             / INTVAL (XEXP (XEXP (x, 0), 1))));
1147
              regx2 = XEXP (XEXP (x, 0), 0);
1148
              if (GET_CODE (regx2) != REG)
1149
                regx2 = force_reg (Pmode, force_operand (regx2, 0));
1150
              regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1151
                                                        regx2, regx1));
1152
              return
1153
                force_reg (Pmode,
1154
                           gen_rtx_PLUS (Pmode,
1155
                                         gen_rtx_MULT (Pmode, regx2,
1156
                                                       XEXP (XEXP (x, 0), 1)),
1157
                                         force_reg (Pmode, XEXP (y, 0))));
1158
            }
1159
          else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1160
                   && INTVAL (XEXP (y, 1)) >= -4096
1161
                   && INTVAL (XEXP (y, 1)) <= 4095)
1162
            {
1163
              /* This is safe because of the guard page at the
1164
                 beginning and end of the data space.  Just
1165
                 return the original address.  */
1166
              return orig;
1167
            }
1168
          else
1169
            {
1170
              /* Doesn't look like one we can optimize.  */
1171
              regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1172
              regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1173
              regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1174
              regx1 = force_reg (Pmode,
1175
                                 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1176
                                                 regx1, regy2));
1177
              return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1178
            }
1179
        }
1180
    }
1181
 
1182
  return orig;
1183
}
1184
 
1185
/* For the HPPA, REG and REG+CONST is cost 0
1186
   and addresses involving symbolic constants are cost 2.
1187
 
1188
   PIC addresses are very expensive.
1189
 
1190
   It is no coincidence that this has the same structure
1191
   as GO_IF_LEGITIMATE_ADDRESS.  */
1192
 
1193
static int
1194
hppa_address_cost (rtx X)
1195
{
1196
  switch (GET_CODE (X))
1197
    {
1198
    case REG:
1199
    case PLUS:
1200
    case LO_SUM:
1201
      return 1;
1202
    case HIGH:
1203
      return 2;
1204
    default:
1205
      return 4;
1206
    }
1207
}
1208
 
1209
/* Compute a (partial) cost for rtx X.  Return true if the complete
1210
   cost has been computed, and false if subexpressions should be
1211
   scanned.  In either case, *TOTAL contains the cost result.  */
1212
 
1213
static bool
1214
hppa_rtx_costs (rtx x, int code, int outer_code, int *total)
1215
{
1216
  switch (code)
1217
    {
1218
    case CONST_INT:
1219
      if (INTVAL (x) == 0)
1220
        *total = 0;
1221
      else if (INT_14_BITS (x))
1222
        *total = 1;
1223
      else
1224
        *total = 2;
1225
      return true;
1226
 
1227
    case HIGH:
1228
      *total = 2;
1229
      return true;
1230
 
1231
    case CONST:
1232
    case LABEL_REF:
1233
    case SYMBOL_REF:
1234
      *total = 4;
1235
      return true;
1236
 
1237
    case CONST_DOUBLE:
1238
      if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1239
          && outer_code != SET)
1240
        *total = 0;
1241
      else
1242
        *total = 8;
1243
      return true;
1244
 
1245
    case MULT:
1246
      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1247
        *total = COSTS_N_INSNS (3);
1248
      else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1249
        *total = COSTS_N_INSNS (8);
1250
      else
1251
        *total = COSTS_N_INSNS (20);
1252
      return true;
1253
 
1254
    case DIV:
1255
      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1256
        {
1257
          *total = COSTS_N_INSNS (14);
1258
          return true;
1259
        }
1260
      /* FALLTHRU */
1261
 
1262
    case UDIV:
1263
    case MOD:
1264
    case UMOD:
1265
      *total = COSTS_N_INSNS (60);
1266
      return true;
1267
 
1268
    case PLUS: /* this includes shNadd insns */
1269
    case MINUS:
1270
      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1271
        *total = COSTS_N_INSNS (3);
1272
      else
1273
        *total = COSTS_N_INSNS (1);
1274
      return true;
1275
 
1276
    case ASHIFT:
1277
    case ASHIFTRT:
1278
    case LSHIFTRT:
1279
      *total = COSTS_N_INSNS (1);
1280
      return true;
1281
 
1282
    default:
1283
      return false;
1284
    }
1285
}
1286
 
1287
/* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
1288
   new rtx with the correct mode.  */
1289
static inline rtx
1290
force_mode (enum machine_mode mode, rtx orig)
1291
{
1292
  if (mode == GET_MODE (orig))
1293
    return orig;
1294
 
1295
  gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1296
 
1297
  return gen_rtx_REG (mode, REGNO (orig));
1298
}
1299
 
1300
/* Return 1 if *X is a thread-local symbol.  */
1301
 
1302
static int
1303
pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1304
{
1305
  return PA_SYMBOL_REF_TLS_P (*x);
1306
}
1307
 
1308
/* Return 1 if X contains a thread-local symbol.  */
1309
 
1310
bool
1311
pa_tls_referenced_p (rtx x)
1312
{
1313
  if (!TARGET_HAVE_TLS)
1314
    return false;
1315
 
1316
  return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1317
}
1318
 
1319
/* Emit insns to move operands[1] into operands[0].
1320
 
1321
   Return 1 if we have written out everything that needs to be done to
1322
   do the move.  Otherwise, return 0 and the caller will emit the move
1323
   normally.
1324
 
1325
   Note SCRATCH_REG may not be in the proper mode depending on how it
1326
   will be used.  This routine is responsible for creating a new copy
1327
   of SCRATCH_REG in the proper mode.  */
1328
 
1329
int
1330
emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
1331
{
1332
  register rtx operand0 = operands[0];
1333
  register rtx operand1 = operands[1];
1334
  register rtx tem;
1335
 
1336
  /* We can only handle indexed addresses in the destination operand
1337
     of floating point stores.  Thus, we need to break out indexed
1338
     addresses from the destination operand.  */
1339
  if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1340
    {
1341
      /* This is only safe up to the beginning of life analysis.  */
1342
      gcc_assert (!no_new_pseudos);
1343
 
1344
      tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1345
      operand0 = replace_equiv_address (operand0, tem);
1346
    }
1347
 
1348
  /* On targets with non-equivalent space registers, break out unscaled
1349
     indexed addresses from the source operand before the final CSE.
1350
     We have to do this because the REG_POINTER flag is not correctly
1351
     carried through various optimization passes and CSE may substitute
1352
     a pseudo without the pointer set for one with the pointer set.  As
1353
     a result, we loose various opportunities to create insns with
1354
     unscaled indexed addresses.  */
1355
  if (!TARGET_NO_SPACE_REGS
1356
      && !cse_not_expected
1357
      && GET_CODE (operand1) == MEM
1358
      && GET_CODE (XEXP (operand1, 0)) == PLUS
1359
      && REG_P (XEXP (XEXP (operand1, 0), 0))
1360
      && REG_P (XEXP (XEXP (operand1, 0), 1)))
1361
    operand1
1362
      = replace_equiv_address (operand1,
1363
                               copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1364
 
1365
  if (scratch_reg
1366
      && reload_in_progress && GET_CODE (operand0) == REG
1367
      && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1368
    operand0 = reg_equiv_mem[REGNO (operand0)];
1369
  else if (scratch_reg
1370
           && reload_in_progress && GET_CODE (operand0) == SUBREG
1371
           && GET_CODE (SUBREG_REG (operand0)) == REG
1372
           && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1373
    {
1374
     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1375
        the code which tracks sets/uses for delete_output_reload.  */
1376
      rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1377
                                 reg_equiv_mem [REGNO (SUBREG_REG (operand0))],
1378
                                 SUBREG_BYTE (operand0));
1379
      operand0 = alter_subreg (&temp);
1380
    }
1381
 
1382
  if (scratch_reg
1383
      && reload_in_progress && GET_CODE (operand1) == REG
1384
      && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1385
    operand1 = reg_equiv_mem[REGNO (operand1)];
1386
  else if (scratch_reg
1387
           && reload_in_progress && GET_CODE (operand1) == SUBREG
1388
           && GET_CODE (SUBREG_REG (operand1)) == REG
1389
           && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1390
    {
1391
     /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1392
        the code which tracks sets/uses for delete_output_reload.  */
1393
      rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1394
                                 reg_equiv_mem [REGNO (SUBREG_REG (operand1))],
1395
                                 SUBREG_BYTE (operand1));
1396
      operand1 = alter_subreg (&temp);
1397
    }
1398
 
1399
  if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1400
      && ((tem = find_replacement (&XEXP (operand0, 0)))
1401
          != XEXP (operand0, 0)))
1402
    operand0 = replace_equiv_address (operand0, tem);
1403
 
1404
  if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1405
      && ((tem = find_replacement (&XEXP (operand1, 0)))
1406
          != XEXP (operand1, 0)))
1407
    operand1 = replace_equiv_address (operand1, tem);
1408
 
1409
  /* Handle secondary reloads for loads/stores of FP registers from
1410
     REG+D addresses where D does not fit in 5 or 14 bits, including
1411
     (subreg (mem (addr))) cases.  */
1412
  if (scratch_reg
1413
      && fp_reg_operand (operand0, mode)
1414
      && ((GET_CODE (operand1) == MEM
1415
           && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1416
                                 XEXP (operand1, 0)))
1417
          || ((GET_CODE (operand1) == SUBREG
1418
               && GET_CODE (XEXP (operand1, 0)) == MEM
1419
               && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1420
                                      ? SFmode : DFmode),
1421
                                     XEXP (XEXP (operand1, 0), 0))))))
1422
    {
1423
      if (GET_CODE (operand1) == SUBREG)
1424
        operand1 = XEXP (operand1, 0);
1425
 
1426
      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1427
         it in WORD_MODE regardless of what mode it was originally given
1428
         to us.  */
1429
      scratch_reg = force_mode (word_mode, scratch_reg);
1430
 
1431
      /* D might not fit in 14 bits either; for such cases load D into
1432
         scratch reg.  */
1433
      if (!memory_address_p (Pmode, XEXP (operand1, 0)))
1434
        {
1435
          emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1436
          emit_move_insn (scratch_reg,
1437
                          gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1438
                                          Pmode,
1439
                                          XEXP (XEXP (operand1, 0), 0),
1440
                                          scratch_reg));
1441
        }
1442
      else
1443
        emit_move_insn (scratch_reg, XEXP (operand1, 0));
1444
      emit_insn (gen_rtx_SET (VOIDmode, operand0,
1445
                              replace_equiv_address (operand1, scratch_reg)));
1446
      return 1;
1447
    }
1448
  else if (scratch_reg
1449
           && fp_reg_operand (operand1, mode)
1450
           && ((GET_CODE (operand0) == MEM
1451
                && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1452
                                        ? SFmode : DFmode),
1453
                                       XEXP (operand0, 0)))
1454
               || ((GET_CODE (operand0) == SUBREG)
1455
                   && GET_CODE (XEXP (operand0, 0)) == MEM
1456
                   && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1457
                                          ? SFmode : DFmode),
1458
                                         XEXP (XEXP (operand0, 0), 0)))))
1459
    {
1460
      if (GET_CODE (operand0) == SUBREG)
1461
        operand0 = XEXP (operand0, 0);
1462
 
1463
      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1464
         it in WORD_MODE regardless of what mode it was originally given
1465
         to us.  */
1466
      scratch_reg = force_mode (word_mode, scratch_reg);
1467
 
1468
      /* D might not fit in 14 bits either; for such cases load D into
1469
         scratch reg.  */
1470
      if (!memory_address_p (Pmode, XEXP (operand0, 0)))
1471
        {
1472
          emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
1473
          emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1474
                                                                        0)),
1475
                                                       Pmode,
1476
                                                       XEXP (XEXP (operand0, 0),
1477
                                                                   0),
1478
                                                       scratch_reg));
1479
        }
1480
      else
1481
        emit_move_insn (scratch_reg, XEXP (operand0, 0));
1482
      emit_insn (gen_rtx_SET (VOIDmode,
1483
                              replace_equiv_address (operand0, scratch_reg),
1484
                              operand1));
1485
      return 1;
1486
    }
1487
  /* Handle secondary reloads for loads of FP registers from constant
1488
     expressions by forcing the constant into memory.
1489
 
1490
     Use scratch_reg to hold the address of the memory location.
1491
 
1492
     The proper fix is to change PREFERRED_RELOAD_CLASS to return
1493
     NO_REGS when presented with a const_int and a register class
1494
     containing only FP registers.  Doing so unfortunately creates
1495
     more problems than it solves.   Fix this for 2.5.  */
1496
  else if (scratch_reg
1497
           && CONSTANT_P (operand1)
1498
           && fp_reg_operand (operand0, mode))
1499
    {
1500
      rtx const_mem, xoperands[2];
1501
 
1502
      /* SCRATCH_REG will hold an address and maybe the actual data.  We want
1503
         it in WORD_MODE regardless of what mode it was originally given
1504
         to us.  */
1505
      scratch_reg = force_mode (word_mode, scratch_reg);
1506
 
1507
      /* Force the constant into memory and put the address of the
1508
         memory location into scratch_reg.  */
1509
      const_mem = force_const_mem (mode, operand1);
1510
      xoperands[0] = scratch_reg;
1511
      xoperands[1] = XEXP (const_mem, 0);
1512
      emit_move_sequence (xoperands, Pmode, 0);
1513
 
1514
      /* Now load the destination register.  */
1515
      emit_insn (gen_rtx_SET (mode, operand0,
1516
                              replace_equiv_address (const_mem, scratch_reg)));
1517
      return 1;
1518
    }
1519
  /* Handle secondary reloads for SAR.  These occur when trying to load
1520
     the SAR from memory, FP register, or with a constant.  */
1521
  else if (scratch_reg
1522
           && GET_CODE (operand0) == REG
1523
           && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1524
           && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1525
           && (GET_CODE (operand1) == MEM
1526
               || GET_CODE (operand1) == CONST_INT
1527
               || (GET_CODE (operand1) == REG
1528
                   && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))))
1529
    {
1530
      /* D might not fit in 14 bits either; for such cases load D into
1531
         scratch reg.  */
1532
      if (GET_CODE (operand1) == MEM
1533
          && !memory_address_p (Pmode, XEXP (operand1, 0)))
1534
        {
1535
          /* We are reloading the address into the scratch register, so we
1536
             want to make sure the scratch register is a full register.  */
1537
          scratch_reg = force_mode (word_mode, scratch_reg);
1538
 
1539
          emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1540
          emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1541
                                                                        0)),
1542
                                                       Pmode,
1543
                                                       XEXP (XEXP (operand1, 0),
1544
                                                       0),
1545
                                                       scratch_reg));
1546
 
1547
          /* Now we are going to load the scratch register from memory,
1548
             we want to load it in the same width as the original MEM,
1549
             which must be the same as the width of the ultimate destination,
1550
             OPERAND0.  */
1551
          scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1552
 
1553
          emit_move_insn (scratch_reg,
1554
                          replace_equiv_address (operand1, scratch_reg));
1555
        }
1556
      else
1557
        {
1558
          /* We want to load the scratch register using the same mode as
1559
             the ultimate destination.  */
1560
          scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1561
 
1562
          emit_move_insn (scratch_reg, operand1);
1563
        }
1564
 
1565
      /* And emit the insn to set the ultimate destination.  We know that
1566
         the scratch register has the same mode as the destination at this
1567
         point.  */
1568
      emit_move_insn (operand0, scratch_reg);
1569
      return 1;
1570
    }
1571
  /* Handle the most common case: storing into a register.  */
1572
  else if (register_operand (operand0, mode))
1573
    {
1574
      if (register_operand (operand1, mode)
1575
          || (GET_CODE (operand1) == CONST_INT
1576
              && cint_ok_for_move (INTVAL (operand1)))
1577
          || (operand1 == CONST0_RTX (mode))
1578
          || (GET_CODE (operand1) == HIGH
1579
              && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1580
          /* Only `general_operands' can come here, so MEM is ok.  */
1581
          || GET_CODE (operand1) == MEM)
1582
        {
1583
          /* Various sets are created during RTL generation which don't
1584
             have the REG_POINTER flag correctly set.  After the CSE pass,
1585
             instruction recognition can fail if we don't consistently
1586
             set this flag when performing register copies.  This should
1587
             also improve the opportunities for creating insns that use
1588
             unscaled indexing.  */
1589
          if (REG_P (operand0) && REG_P (operand1))
1590
            {
1591
              if (REG_POINTER (operand1)
1592
                  && !REG_POINTER (operand0)
1593
                  && !HARD_REGISTER_P (operand0))
1594
                copy_reg_pointer (operand0, operand1);
1595
              else if (REG_POINTER (operand0)
1596
                       && !REG_POINTER (operand1)
1597
                       && !HARD_REGISTER_P (operand1))
1598
                copy_reg_pointer (operand1, operand0);
1599
            }
1600
 
1601
          /* When MEMs are broken out, the REG_POINTER flag doesn't
1602
             get set.  In some cases, we can set the REG_POINTER flag
1603
             from the declaration for the MEM.  */
1604
          if (REG_P (operand0)
1605
              && GET_CODE (operand1) == MEM
1606
              && !REG_POINTER (operand0))
1607
            {
1608
              tree decl = MEM_EXPR (operand1);
1609
 
1610
              /* Set the register pointer flag and register alignment
1611
                 if the declaration for this memory reference is a
1612
                 pointer type.  Fortran indirect argument references
1613
                 are ignored.  */
1614
              if (decl
1615
                  && !(flag_argument_noalias > 1
1616
                       && TREE_CODE (decl) == INDIRECT_REF
1617
                       && TREE_CODE (TREE_OPERAND (decl, 0)) == PARM_DECL))
1618
                {
1619
                  tree type;
1620
 
1621
                  /* If this is a COMPONENT_REF, use the FIELD_DECL from
1622
                     tree operand 1.  */
1623
                  if (TREE_CODE (decl) == COMPONENT_REF)
1624
                    decl = TREE_OPERAND (decl, 1);
1625
 
1626
                  type = TREE_TYPE (decl);
1627
                  if (TREE_CODE (type) == ARRAY_TYPE)
1628
                    type = get_inner_array_type (type);
1629
 
1630
                  if (POINTER_TYPE_P (type))
1631
                    {
1632
                      int align;
1633
 
1634
                      type = TREE_TYPE (type);
1635
                      /* Using TYPE_ALIGN_OK is rather conservative as
1636
                         only the ada frontend actually sets it.  */
1637
                      align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1638
                               : BITS_PER_UNIT);
1639
                      mark_reg_pointer (operand0, align);
1640
                    }
1641
                }
1642
            }
1643
 
1644
          emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1645
          return 1;
1646
        }
1647
    }
1648
  else if (GET_CODE (operand0) == MEM)
1649
    {
1650
      if (mode == DFmode && operand1 == CONST0_RTX (mode)
1651
          && !(reload_in_progress || reload_completed))
1652
        {
1653
          rtx temp = gen_reg_rtx (DFmode);
1654
 
1655
          emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1656
          emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
1657
          return 1;
1658
        }
1659
      if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1660
        {
1661
          /* Run this case quickly.  */
1662
          emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1663
          return 1;
1664
        }
1665
      if (! (reload_in_progress || reload_completed))
1666
        {
1667
          operands[0] = validize_mem (operand0);
1668
          operands[1] = operand1 = force_reg (mode, operand1);
1669
        }
1670
    }
1671
 
1672
  /* Simplify the source if we need to.
1673
     Note we do have to handle function labels here, even though we do
1674
     not consider them legitimate constants.  Loop optimizations can
1675
     call the emit_move_xxx with one as a source.  */
1676
  if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1677
      || function_label_operand (operand1, mode)
1678
      || (GET_CODE (operand1) == HIGH
1679
          && symbolic_operand (XEXP (operand1, 0), mode)))
1680
    {
1681
      int ishighonly = 0;
1682
 
1683
      if (GET_CODE (operand1) == HIGH)
1684
        {
1685
          ishighonly = 1;
1686
          operand1 = XEXP (operand1, 0);
1687
        }
1688
      if (symbolic_operand (operand1, mode))
1689
        {
1690
          /* Argh.  The assembler and linker can't handle arithmetic
1691
             involving plabels.
1692
 
1693
             So we force the plabel into memory, load operand0 from
1694
             the memory location, then add in the constant part.  */
1695
          if ((GET_CODE (operand1) == CONST
1696
               && GET_CODE (XEXP (operand1, 0)) == PLUS
1697
               && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
1698
              || function_label_operand (operand1, mode))
1699
            {
1700
              rtx temp, const_part;
1701
 
1702
              /* Figure out what (if any) scratch register to use.  */
1703
              if (reload_in_progress || reload_completed)
1704
                {
1705
                  scratch_reg = scratch_reg ? scratch_reg : operand0;
1706
                  /* SCRATCH_REG will hold an address and maybe the actual
1707
                     data.  We want it in WORD_MODE regardless of what mode it
1708
                     was originally given to us.  */
1709
                  scratch_reg = force_mode (word_mode, scratch_reg);
1710
                }
1711
              else if (flag_pic)
1712
                scratch_reg = gen_reg_rtx (Pmode);
1713
 
1714
              if (GET_CODE (operand1) == CONST)
1715
                {
1716
                  /* Save away the constant part of the expression.  */
1717
                  const_part = XEXP (XEXP (operand1, 0), 1);
1718
                  gcc_assert (GET_CODE (const_part) == CONST_INT);
1719
 
1720
                  /* Force the function label into memory.  */
1721
                  temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1722
                }
1723
              else
1724
                {
1725
                  /* No constant part.  */
1726
                  const_part = NULL_RTX;
1727
 
1728
                  /* Force the function label into memory.  */
1729
                  temp = force_const_mem (mode, operand1);
1730
                }
1731
 
1732
 
1733
              /* Get the address of the memory location.  PIC-ify it if
1734
                 necessary.  */
1735
              temp = XEXP (temp, 0);
1736
              if (flag_pic)
1737
                temp = legitimize_pic_address (temp, mode, scratch_reg);
1738
 
1739
              /* Put the address of the memory location into our destination
1740
                 register.  */
1741
              operands[1] = temp;
1742
              emit_move_sequence (operands, mode, scratch_reg);
1743
 
1744
              /* Now load from the memory location into our destination
1745
                 register.  */
1746
              operands[1] = gen_rtx_MEM (Pmode, operands[0]);
1747
              emit_move_sequence (operands, mode, scratch_reg);
1748
 
1749
              /* And add back in the constant part.  */
1750
              if (const_part != NULL_RTX)
1751
                expand_inc (operand0, const_part);
1752
 
1753
              return 1;
1754
            }
1755
 
1756
          if (flag_pic)
1757
            {
1758
              rtx temp;
1759
 
1760
              if (reload_in_progress || reload_completed)
1761
                {
1762
                  temp = scratch_reg ? scratch_reg : operand0;
1763
                  /* TEMP will hold an address and maybe the actual
1764
                     data.  We want it in WORD_MODE regardless of what mode it
1765
                     was originally given to us.  */
1766
                  temp = force_mode (word_mode, temp);
1767
                }
1768
              else
1769
                temp = gen_reg_rtx (Pmode);
1770
 
1771
              /* (const (plus (symbol) (const_int))) must be forced to
1772
                 memory during/after reload if the const_int will not fit
1773
                 in 14 bits.  */
1774
              if (GET_CODE (operand1) == CONST
1775
                       && GET_CODE (XEXP (operand1, 0)) == PLUS
1776
                       && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1777
                       && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1778
                       && (reload_completed || reload_in_progress)
1779
                       && flag_pic)
1780
                {
1781
                  rtx const_mem = force_const_mem (mode, operand1);
1782
                  operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
1783
                                                        mode, temp);
1784
                  operands[1] = replace_equiv_address (const_mem, operands[1]);
1785
                  emit_move_sequence (operands, mode, temp);
1786
                }
1787
              else
1788
                {
1789
                  operands[1] = legitimize_pic_address (operand1, mode, temp);
1790
                  if (REG_P (operand0) && REG_P (operands[1]))
1791
                    copy_reg_pointer (operand0, operands[1]);
1792
                  emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
1793
                }
1794
            }
1795
          /* On the HPPA, references to data space are supposed to use dp,
1796
             register 27, but showing it in the RTL inhibits various cse
1797
             and loop optimizations.  */
1798
          else
1799
            {
1800
              rtx temp, set;
1801
 
1802
              if (reload_in_progress || reload_completed)
1803
                {
1804
                  temp = scratch_reg ? scratch_reg : operand0;
1805
                  /* TEMP will hold an address and maybe the actual
1806
                     data.  We want it in WORD_MODE regardless of what mode it
1807
                     was originally given to us.  */
1808
                  temp = force_mode (word_mode, temp);
1809
                }
1810
              else
1811
                temp = gen_reg_rtx (mode);
1812
 
1813
              /* Loading a SYMBOL_REF into a register makes that register
1814
                 safe to be used as the base in an indexed address.
1815
 
1816
                 Don't mark hard registers though.  That loses.  */
1817
              if (GET_CODE (operand0) == REG
1818
                  && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1819
                mark_reg_pointer (operand0, BITS_PER_UNIT);
1820
              if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
1821
                mark_reg_pointer (temp, BITS_PER_UNIT);
1822
 
1823
              if (ishighonly)
1824
                set = gen_rtx_SET (mode, operand0, temp);
1825
              else
1826
                set = gen_rtx_SET (VOIDmode,
1827
                                   operand0,
1828
                                   gen_rtx_LO_SUM (mode, temp, operand1));
1829
 
1830
              emit_insn (gen_rtx_SET (VOIDmode,
1831
                                      temp,
1832
                                      gen_rtx_HIGH (mode, operand1)));
1833
              emit_insn (set);
1834
 
1835
            }
1836
          return 1;
1837
        }
1838
      else if (pa_tls_referenced_p (operand1))
1839
        {
1840
          rtx tmp = operand1;
1841
          rtx addend = NULL;
1842
 
1843
          if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
1844
            {
1845
              addend = XEXP (XEXP (tmp, 0), 1);
1846
              tmp = XEXP (XEXP (tmp, 0), 0);
1847
            }
1848
 
1849
          gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
1850
          tmp = legitimize_tls_address (tmp);
1851
          if (addend)
1852
            {
1853
              tmp = gen_rtx_PLUS (mode, tmp, addend);
1854
              tmp = force_operand (tmp, operands[0]);
1855
            }
1856
          operands[1] = tmp;
1857
        }
1858
      else if (GET_CODE (operand1) != CONST_INT
1859
               || !cint_ok_for_move (INTVAL (operand1)))
1860
        {
1861
          rtx insn, temp;
1862
          rtx op1 = operand1;
1863
          HOST_WIDE_INT value = 0;
1864
          HOST_WIDE_INT insv = 0;
1865
          int insert = 0;
1866
 
1867
          if (GET_CODE (operand1) == CONST_INT)
1868
            value = INTVAL (operand1);
1869
 
1870
          if (TARGET_64BIT
1871
              && GET_CODE (operand1) == CONST_INT
1872
              && HOST_BITS_PER_WIDE_INT > 32
1873
              && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
1874
            {
1875
              HOST_WIDE_INT nval;
1876
 
1877
              /* Extract the low order 32 bits of the value and sign extend.
1878
                 If the new value is the same as the original value, we can
1879
                 can use the original value as-is.  If the new value is
1880
                 different, we use it and insert the most-significant 32-bits
1881
                 of the original value into the final result.  */
1882
              nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
1883
                      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
1884
              if (value != nval)
1885
                {
1886
#if HOST_BITS_PER_WIDE_INT > 32
1887
                  insv = value >= 0 ? value >> 32 : ~(~value >> 32);
1888
#endif
1889
                  insert = 1;
1890
                  value = nval;
1891
                  operand1 = GEN_INT (nval);
1892
                }
1893
            }
1894
 
1895
          if (reload_in_progress || reload_completed)
1896
            temp = scratch_reg ? scratch_reg : operand0;
1897
          else
1898
            temp = gen_reg_rtx (mode);
1899
 
1900
          /* We don't directly split DImode constants on 32-bit targets
1901
             because PLUS uses an 11-bit immediate and the insn sequence
1902
             generated is not as efficient as the one using HIGH/LO_SUM.  */
1903
          if (GET_CODE (operand1) == CONST_INT
1904
              && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
1905
              && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
1906
              && !insert)
1907
            {
1908
              /* Directly break constant into high and low parts.  This
1909
                 provides better optimization opportunities because various
1910
                 passes recognize constants split with PLUS but not LO_SUM.
1911
                 We use a 14-bit signed low part except when the addition
1912
                 of 0x4000 to the high part might change the sign of the
1913
                 high part.  */
1914
              HOST_WIDE_INT low = value & 0x3fff;
1915
              HOST_WIDE_INT high = value & ~ 0x3fff;
1916
 
1917
              if (low >= 0x2000)
1918
                {
1919
                  if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
1920
                    high += 0x2000;
1921
                  else
1922
                    high += 0x4000;
1923
                }
1924
 
1925
              low = value - high;
1926
 
1927
              emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
1928
              operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1929
            }
1930
          else
1931
            {
1932
              emit_insn (gen_rtx_SET (VOIDmode, temp,
1933
                                      gen_rtx_HIGH (mode, operand1)));
1934
              operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
1935
            }
1936
 
1937
          insn = emit_move_insn (operands[0], operands[1]);
1938
 
1939
          /* Now insert the most significant 32 bits of the value
1940
             into the register.  When we don't have a second register
1941
             available, it could take up to nine instructions to load
1942
             a 64-bit integer constant.  Prior to reload, we force
1943
             constants that would take more than three instructions
1944
             to load to the constant pool.  During and after reload,
1945
             we have to handle all possible values.  */
1946
          if (insert)
1947
            {
1948
              /* Use a HIGH/LO_SUM/INSV sequence if we have a second
1949
                 register and the value to be inserted is outside the
1950
                 range that can be loaded with three depdi instructions.  */
1951
              if (temp != operand0 && (insv >= 16384 || insv < -16384))
1952
                {
1953
                  operand1 = GEN_INT (insv);
1954
 
1955
                  emit_insn (gen_rtx_SET (VOIDmode, temp,
1956
                                          gen_rtx_HIGH (mode, operand1)));
1957
                  emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
1958
                  emit_insn (gen_insv (operand0, GEN_INT (32),
1959
                                       const0_rtx, temp));
1960
                }
1961
              else
1962
                {
1963
                  int len = 5, pos = 27;
1964
 
1965
                  /* Insert the bits using the depdi instruction.  */
1966
                  while (pos >= 0)
1967
                    {
1968
                      HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
1969
                      HOST_WIDE_INT sign = v5 < 0;
1970
 
1971
                      /* Left extend the insertion.  */
1972
                      insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
1973
                      while (pos > 0 && (insv & 1) == sign)
1974
                        {
1975
                          insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
1976
                          len += 1;
1977
                          pos -= 1;
1978
                        }
1979
 
1980
                      emit_insn (gen_insv (operand0, GEN_INT (len),
1981
                                           GEN_INT (pos), GEN_INT (v5)));
1982
 
1983
                      len = pos > 0 && pos < 5 ? pos : 5;
1984
                      pos -= len;
1985
                    }
1986
                }
1987
            }
1988
 
1989
          REG_NOTES (insn)
1990
            = gen_rtx_EXPR_LIST (REG_EQUAL, op1, REG_NOTES (insn));
1991
 
1992
          return 1;
1993
        }
1994
    }
1995
  /* Now have insn-emit do whatever it normally does.  */
1996
  return 0;
1997
}
1998
 
1999
/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2000
   it will need a link/runtime reloc).  */
2001
 
2002
int
2003
reloc_needed (tree exp)
2004
{
2005
  int reloc = 0;
2006
 
2007
  switch (TREE_CODE (exp))
2008
    {
2009
    case ADDR_EXPR:
2010
      return 1;
2011
 
2012
    case PLUS_EXPR:
2013
    case MINUS_EXPR:
2014
      reloc = reloc_needed (TREE_OPERAND (exp, 0));
2015
      reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2016
      break;
2017
 
2018
    case NOP_EXPR:
2019
    case CONVERT_EXPR:
2020
    case NON_LVALUE_EXPR:
2021
      reloc = reloc_needed (TREE_OPERAND (exp, 0));
2022
      break;
2023
 
2024
    case CONSTRUCTOR:
2025
      {
2026
        tree value;
2027
        unsigned HOST_WIDE_INT ix;
2028
 
2029
        FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2030
          if (value)
2031
            reloc |= reloc_needed (value);
2032
      }
2033
      break;
2034
 
2035
    case ERROR_MARK:
2036
      break;
2037
 
2038
    default:
2039
      break;
2040
    }
2041
  return reloc;
2042
}
2043
 
2044
/* Does operand (which is a symbolic_operand) live in text space?
2045
   If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2046
   will be true.  */
2047
 
2048
int
2049
read_only_operand (rtx operand, enum machine_mode mode ATTRIBUTE_UNUSED)
2050
{
2051
  if (GET_CODE (operand) == CONST)
2052
    operand = XEXP (XEXP (operand, 0), 0);
2053
  if (flag_pic)
2054
    {
2055
      if (GET_CODE (operand) == SYMBOL_REF)
2056
        return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
2057
    }
2058
  else
2059
    {
2060
      if (GET_CODE (operand) == SYMBOL_REF)
2061
        return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
2062
    }
2063
  return 1;
2064
}
2065
 
2066
 
2067
/* Return the best assembler insn template
2068
   for moving operands[1] into operands[0] as a fullword.  */
2069
const char *
2070
singlemove_string (rtx *operands)
2071
{
2072
  HOST_WIDE_INT intval;
2073
 
2074
  if (GET_CODE (operands[0]) == MEM)
2075
    return "stw %r1,%0";
2076
  if (GET_CODE (operands[1]) == MEM)
2077
    return "ldw %1,%0";
2078
  if (GET_CODE (operands[1]) == CONST_DOUBLE)
2079
    {
2080
      long i;
2081
      REAL_VALUE_TYPE d;
2082
 
2083
      gcc_assert (GET_MODE (operands[1]) == SFmode);
2084
 
2085
      /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2086
         bit pattern.  */
2087
      REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2088
      REAL_VALUE_TO_TARGET_SINGLE (d, i);
2089
 
2090
      operands[1] = GEN_INT (i);
2091
      /* Fall through to CONST_INT case.  */
2092
    }
2093
  if (GET_CODE (operands[1]) == CONST_INT)
2094
    {
2095
      intval = INTVAL (operands[1]);
2096
 
2097
      if (VAL_14_BITS_P (intval))
2098
        return "ldi %1,%0";
2099
      else if ((intval & 0x7ff) == 0)
2100
        return "ldil L'%1,%0";
2101
      else if (zdepi_cint_p (intval))
2102
        return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2103
      else
2104
        return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2105
    }
2106
  return "copy %1,%0";
2107
}
2108
 
2109
 
2110
/* Compute position (in OP[1]) and width (in OP[2])
2111
   useful for copying IMM to a register using the zdepi
2112
   instructions.  Store the immediate value to insert in OP[0].  */
2113
static void
2114
compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2115
{
2116
  int lsb, len;
2117
 
2118
  /* Find the least significant set bit in IMM.  */
2119
  for (lsb = 0; lsb < 32; lsb++)
2120
    {
2121
      if ((imm & 1) != 0)
2122
        break;
2123
      imm >>= 1;
2124
    }
2125
 
2126
  /* Choose variants based on *sign* of the 5-bit field.  */
2127
  if ((imm & 0x10) == 0)
2128
    len = (lsb <= 28) ? 4 : 32 - lsb;
2129
  else
2130
    {
2131
      /* Find the width of the bitstring in IMM.  */
2132
      for (len = 5; len < 32; len++)
2133
        {
2134
          if ((imm & (1 << len)) == 0)
2135
            break;
2136
        }
2137
 
2138
      /* Sign extend IMM as a 5-bit value.  */
2139
      imm = (imm & 0xf) - 0x10;
2140
    }
2141
 
2142
  op[0] = imm;
2143
  op[1] = 31 - lsb;
2144
  op[2] = len;
2145
}
2146
 
2147
/* Compute position (in OP[1]) and width (in OP[2])
2148
   useful for copying IMM to a register using the depdi,z
2149
   instructions.  Store the immediate value to insert in OP[0].  */
2150
void
2151
compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2152
{
2153
  HOST_WIDE_INT lsb, len;
2154
 
2155
  /* Find the least significant set bit in IMM.  */
2156
  for (lsb = 0; lsb < HOST_BITS_PER_WIDE_INT; lsb++)
2157
    {
2158
      if ((imm & 1) != 0)
2159
        break;
2160
      imm >>= 1;
2161
    }
2162
 
2163
  /* Choose variants based on *sign* of the 5-bit field.  */
2164
  if ((imm & 0x10) == 0)
2165
    len = ((lsb <= HOST_BITS_PER_WIDE_INT - 4)
2166
           ? 4 : HOST_BITS_PER_WIDE_INT - lsb);
2167
  else
2168
    {
2169
      /* Find the width of the bitstring in IMM.  */
2170
      for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
2171
        {
2172
          if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2173
            break;
2174
        }
2175
 
2176
      /* Sign extend IMM as a 5-bit value.  */
2177
      imm = (imm & 0xf) - 0x10;
2178
    }
2179
 
2180
  op[0] = imm;
2181
  op[1] = 63 - lsb;
2182
  op[2] = len;
2183
}
2184
 
2185
/* Output assembler code to perform a doubleword move insn
2186
   with operands OPERANDS.  */
2187
 
2188
const char *
2189
output_move_double (rtx *operands)
2190
{
2191
  enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2192
  rtx latehalf[2];
2193
  rtx addreg0 = 0, addreg1 = 0;
2194
 
2195
  /* First classify both operands.  */
2196
 
2197
  if (REG_P (operands[0]))
2198
    optype0 = REGOP;
2199
  else if (offsettable_memref_p (operands[0]))
2200
    optype0 = OFFSOP;
2201
  else if (GET_CODE (operands[0]) == MEM)
2202
    optype0 = MEMOP;
2203
  else
2204
    optype0 = RNDOP;
2205
 
2206
  if (REG_P (operands[1]))
2207
    optype1 = REGOP;
2208
  else if (CONSTANT_P (operands[1]))
2209
    optype1 = CNSTOP;
2210
  else if (offsettable_memref_p (operands[1]))
2211
    optype1 = OFFSOP;
2212
  else if (GET_CODE (operands[1]) == MEM)
2213
    optype1 = MEMOP;
2214
  else
2215
    optype1 = RNDOP;
2216
 
2217
  /* Check for the cases that the operand constraints are not
2218
     supposed to allow to happen.  */
2219
  gcc_assert (optype0 == REGOP || optype1 == REGOP);
2220
 
2221
  /* Handle copies between general and floating registers.  */
2222
 
2223
  if (optype0 == REGOP && optype1 == REGOP
2224
      && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2225
    {
2226
      if (FP_REG_P (operands[0]))
2227
        {
2228
          output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2229
          output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2230
          return "{fldds|fldd} -16(%%sp),%0";
2231
        }
2232
      else
2233
        {
2234
          output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2235
          output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2236
          return "{ldws|ldw} -12(%%sp),%R0";
2237
        }
2238
    }
2239
 
2240
   /* Handle auto decrementing and incrementing loads and stores
2241
     specifically, since the structure of the function doesn't work
2242
     for them without major modification.  Do it better when we learn
2243
     this port about the general inc/dec addressing of PA.
2244
     (This was written by tege.  Chide him if it doesn't work.)  */
2245
 
2246
  if (optype0 == MEMOP)
2247
    {
2248
      /* We have to output the address syntax ourselves, since print_operand
2249
         doesn't deal with the addresses we want to use.  Fix this later.  */
2250
 
2251
      rtx addr = XEXP (operands[0], 0);
2252
      if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2253
        {
2254
          rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2255
 
2256
          operands[0] = XEXP (addr, 0);
2257
          gcc_assert (GET_CODE (operands[1]) == REG
2258
                      && GET_CODE (operands[0]) == REG);
2259
 
2260
          gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2261
 
2262
          /* No overlap between high target register and address
2263
             register.  (We do this in a non-obvious way to
2264
             save a register file writeback)  */
2265
          if (GET_CODE (addr) == POST_INC)
2266
            return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2267
          return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2268
        }
2269
      else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2270
        {
2271
          rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2272
 
2273
          operands[0] = XEXP (addr, 0);
2274
          gcc_assert (GET_CODE (operands[1]) == REG
2275
                      && GET_CODE (operands[0]) == REG);
2276
 
2277
          gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2278
          /* No overlap between high target register and address
2279
             register.  (We do this in a non-obvious way to save a
2280
             register file writeback)  */
2281
          if (GET_CODE (addr) == PRE_INC)
2282
            return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2283
          return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2284
        }
2285
    }
2286
  if (optype1 == MEMOP)
2287
    {
2288
      /* We have to output the address syntax ourselves, since print_operand
2289
         doesn't deal with the addresses we want to use.  Fix this later.  */
2290
 
2291
      rtx addr = XEXP (operands[1], 0);
2292
      if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2293
        {
2294
          rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2295
 
2296
          operands[1] = XEXP (addr, 0);
2297
          gcc_assert (GET_CODE (operands[0]) == REG
2298
                      && GET_CODE (operands[1]) == REG);
2299
 
2300
          if (!reg_overlap_mentioned_p (high_reg, addr))
2301
            {
2302
              /* No overlap between high target register and address
2303
                 register.  (We do this in a non-obvious way to
2304
                 save a register file writeback)  */
2305
              if (GET_CODE (addr) == POST_INC)
2306
                return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2307
              return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2308
            }
2309
          else
2310
            {
2311
              /* This is an undefined situation.  We should load into the
2312
                 address register *and* update that register.  Probably
2313
                 we don't need to handle this at all.  */
2314
              if (GET_CODE (addr) == POST_INC)
2315
                return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2316
              return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2317
            }
2318
        }
2319
      else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2320
        {
2321
          rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2322
 
2323
          operands[1] = XEXP (addr, 0);
2324
          gcc_assert (GET_CODE (operands[0]) == REG
2325
                      && GET_CODE (operands[1]) == REG);
2326
 
2327
          if (!reg_overlap_mentioned_p (high_reg, addr))
2328
            {
2329
              /* No overlap between high target register and address
2330
                 register.  (We do this in a non-obvious way to
2331
                 save a register file writeback)  */
2332
              if (GET_CODE (addr) == PRE_INC)
2333
                return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2334
              return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2335
            }
2336
          else
2337
            {
2338
              /* This is an undefined situation.  We should load into the
2339
                 address register *and* update that register.  Probably
2340
                 we don't need to handle this at all.  */
2341
              if (GET_CODE (addr) == PRE_INC)
2342
                return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2343
              return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2344
            }
2345
        }
2346
      else if (GET_CODE (addr) == PLUS
2347
               && GET_CODE (XEXP (addr, 0)) == MULT)
2348
        {
2349
          rtx xoperands[4];
2350
          rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2351
 
2352
          if (!reg_overlap_mentioned_p (high_reg, addr))
2353
            {
2354
              xoperands[0] = high_reg;
2355
              xoperands[1] = XEXP (addr, 1);
2356
              xoperands[2] = XEXP (XEXP (addr, 0), 0);
2357
              xoperands[3] = XEXP (XEXP (addr, 0), 1);
2358
              output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2359
                               xoperands);
2360
              return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2361
            }
2362
          else
2363
            {
2364
              xoperands[0] = high_reg;
2365
              xoperands[1] = XEXP (addr, 1);
2366
              xoperands[2] = XEXP (XEXP (addr, 0), 0);
2367
              xoperands[3] = XEXP (XEXP (addr, 0), 1);
2368
              output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2369
                               xoperands);
2370
              return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2371
            }
2372
        }
2373
    }
2374
 
2375
  /* If an operand is an unoffsettable memory ref, find a register
2376
     we can increment temporarily to make it refer to the second word.  */
2377
 
2378
  if (optype0 == MEMOP)
2379
    addreg0 = find_addr_reg (XEXP (operands[0], 0));
2380
 
2381
  if (optype1 == MEMOP)
2382
    addreg1 = find_addr_reg (XEXP (operands[1], 0));
2383
 
2384
  /* Ok, we can do one word at a time.
2385
     Normally we do the low-numbered word first.
2386
 
2387
     In either case, set up in LATEHALF the operands to use
2388
     for the high-numbered word and in some cases alter the
2389
     operands in OPERANDS to be suitable for the low-numbered word.  */
2390
 
2391
  if (optype0 == REGOP)
2392
    latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2393
  else if (optype0 == OFFSOP)
2394
    latehalf[0] = adjust_address (operands[0], SImode, 4);
2395
  else
2396
    latehalf[0] = operands[0];
2397
 
2398
  if (optype1 == REGOP)
2399
    latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2400
  else if (optype1 == OFFSOP)
2401
    latehalf[1] = adjust_address (operands[1], SImode, 4);
2402
  else if (optype1 == CNSTOP)
2403
    split_double (operands[1], &operands[1], &latehalf[1]);
2404
  else
2405
    latehalf[1] = operands[1];
2406
 
2407
  /* If the first move would clobber the source of the second one,
2408
     do them in the other order.
2409
 
2410
     This can happen in two cases:
2411
 
2412
        mem -> register where the first half of the destination register
2413
        is the same register used in the memory's address.  Reload
2414
        can create such insns.
2415
 
2416
        mem in this case will be either register indirect or register
2417
        indirect plus a valid offset.
2418
 
2419
        register -> register move where REGNO(dst) == REGNO(src + 1)
2420
        someone (Tim/Tege?) claimed this can happen for parameter loads.
2421
 
2422
     Handle mem -> register case first.  */
2423
  if (optype0 == REGOP
2424
      && (optype1 == MEMOP || optype1 == OFFSOP)
2425
      && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2426
                            operands[1], 0))
2427
    {
2428
      /* Do the late half first.  */
2429
      if (addreg1)
2430
        output_asm_insn ("ldo 4(%0),%0", &addreg1);
2431
      output_asm_insn (singlemove_string (latehalf), latehalf);
2432
 
2433
      /* Then clobber.  */
2434
      if (addreg1)
2435
        output_asm_insn ("ldo -4(%0),%0", &addreg1);
2436
      return singlemove_string (operands);
2437
    }
2438
 
2439
  /* Now handle register -> register case.  */
2440
  if (optype0 == REGOP && optype1 == REGOP
2441
      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2442
    {
2443
      output_asm_insn (singlemove_string (latehalf), latehalf);
2444
      return singlemove_string (operands);
2445
    }
2446
 
2447
  /* Normal case: do the two words, low-numbered first.  */
2448
 
2449
  output_asm_insn (singlemove_string (operands), operands);
2450
 
2451
  /* Make any unoffsettable addresses point at high-numbered word.  */
2452
  if (addreg0)
2453
    output_asm_insn ("ldo 4(%0),%0", &addreg0);
2454
  if (addreg1)
2455
    output_asm_insn ("ldo 4(%0),%0", &addreg1);
2456
 
2457
  /* Do that word.  */
2458
  output_asm_insn (singlemove_string (latehalf), latehalf);
2459
 
2460
  /* Undo the adds we just did.  */
2461
  if (addreg0)
2462
    output_asm_insn ("ldo -4(%0),%0", &addreg0);
2463
  if (addreg1)
2464
    output_asm_insn ("ldo -4(%0),%0", &addreg1);
2465
 
2466
  return "";
2467
}
2468
 
2469
const char *
2470
output_fp_move_double (rtx *operands)
2471
{
2472
  if (FP_REG_P (operands[0]))
2473
    {
2474
      if (FP_REG_P (operands[1])
2475
          || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2476
        output_asm_insn ("fcpy,dbl %f1,%0", operands);
2477
      else
2478
        output_asm_insn ("fldd%F1 %1,%0", operands);
2479
    }
2480
  else if (FP_REG_P (operands[1]))
2481
    {
2482
      output_asm_insn ("fstd%F0 %1,%0", operands);
2483
    }
2484
  else
2485
    {
2486
      rtx xoperands[2];
2487
 
2488
      gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2489
 
2490
      /* This is a pain.  You have to be prepared to deal with an
2491
         arbitrary address here including pre/post increment/decrement.
2492
 
2493
         so avoid this in the MD.  */
2494
      gcc_assert (GET_CODE (operands[0]) == REG);
2495
 
2496
      xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2497
      xoperands[0] = operands[0];
2498
      output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2499
    }
2500
  return "";
2501
}
2502
 
2503
/* Return a REG that occurs in ADDR with coefficient 1.
2504
   ADDR can be effectively incremented by incrementing REG.  */
2505
 
2506
static rtx
2507
find_addr_reg (rtx addr)
2508
{
2509
  while (GET_CODE (addr) == PLUS)
2510
    {
2511
      if (GET_CODE (XEXP (addr, 0)) == REG)
2512
        addr = XEXP (addr, 0);
2513
      else if (GET_CODE (XEXP (addr, 1)) == REG)
2514
        addr = XEXP (addr, 1);
2515
      else if (CONSTANT_P (XEXP (addr, 0)))
2516
        addr = XEXP (addr, 1);
2517
      else if (CONSTANT_P (XEXP (addr, 1)))
2518
        addr = XEXP (addr, 0);
2519
      else
2520
        gcc_unreachable ();
2521
    }
2522
  gcc_assert (GET_CODE (addr) == REG);
2523
  return addr;
2524
}
2525
 
2526
/* Emit code to perform a block move.
2527
 
2528
   OPERANDS[0] is the destination pointer as a REG, clobbered.
2529
   OPERANDS[1] is the source pointer as a REG, clobbered.
2530
   OPERANDS[2] is a register for temporary storage.
2531
   OPERANDS[3] is a register for temporary storage.
2532
   OPERANDS[4] is the size as a CONST_INT
2533
   OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2534
   OPERANDS[6] is another temporary register.  */
2535
 
2536
const char *
2537
output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2538
{
2539
  int align = INTVAL (operands[5]);
2540
  unsigned long n_bytes = INTVAL (operands[4]);
2541
 
2542
  /* We can't move more than a word at a time because the PA
2543
     has no longer integer move insns.  (Could use fp mem ops?)  */
2544
  if (align > (TARGET_64BIT ? 8 : 4))
2545
    align = (TARGET_64BIT ? 8 : 4);
2546
 
2547
  /* Note that we know each loop below will execute at least twice
2548
     (else we would have open-coded the copy).  */
2549
  switch (align)
2550
    {
2551
      case 8:
2552
        /* Pre-adjust the loop counter.  */
2553
        operands[4] = GEN_INT (n_bytes - 16);
2554
        output_asm_insn ("ldi %4,%2", operands);
2555
 
2556
        /* Copying loop.  */
2557
        output_asm_insn ("ldd,ma 8(%1),%3", operands);
2558
        output_asm_insn ("ldd,ma 8(%1),%6", operands);
2559
        output_asm_insn ("std,ma %3,8(%0)", operands);
2560
        output_asm_insn ("addib,>= -16,%2,.-12", operands);
2561
        output_asm_insn ("std,ma %6,8(%0)", operands);
2562
 
2563
        /* Handle the residual.  There could be up to 7 bytes of
2564
           residual to copy!  */
2565
        if (n_bytes % 16 != 0)
2566
          {
2567
            operands[4] = GEN_INT (n_bytes % 8);
2568
            if (n_bytes % 16 >= 8)
2569
              output_asm_insn ("ldd,ma 8(%1),%3", operands);
2570
            if (n_bytes % 8 != 0)
2571
              output_asm_insn ("ldd 0(%1),%6", operands);
2572
            if (n_bytes % 16 >= 8)
2573
              output_asm_insn ("std,ma %3,8(%0)", operands);
2574
            if (n_bytes % 8 != 0)
2575
              output_asm_insn ("stdby,e %6,%4(%0)", operands);
2576
          }
2577
        return "";
2578
 
2579
      case 4:
2580
        /* Pre-adjust the loop counter.  */
2581
        operands[4] = GEN_INT (n_bytes - 8);
2582
        output_asm_insn ("ldi %4,%2", operands);
2583
 
2584
        /* Copying loop.  */
2585
        output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2586
        output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2587
        output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2588
        output_asm_insn ("addib,>= -8,%2,.-12", operands);
2589
        output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2590
 
2591
        /* Handle the residual.  There could be up to 7 bytes of
2592
           residual to copy!  */
2593
        if (n_bytes % 8 != 0)
2594
          {
2595
            operands[4] = GEN_INT (n_bytes % 4);
2596
            if (n_bytes % 8 >= 4)
2597
              output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2598
            if (n_bytes % 4 != 0)
2599
              output_asm_insn ("ldw 0(%1),%6", operands);
2600
            if (n_bytes % 8 >= 4)
2601
              output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2602
            if (n_bytes % 4 != 0)
2603
              output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2604
          }
2605
        return "";
2606
 
2607
      case 2:
2608
        /* Pre-adjust the loop counter.  */
2609
        operands[4] = GEN_INT (n_bytes - 4);
2610
        output_asm_insn ("ldi %4,%2", operands);
2611
 
2612
        /* Copying loop.  */
2613
        output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2614
        output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2615
        output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2616
        output_asm_insn ("addib,>= -4,%2,.-12", operands);
2617
        output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2618
 
2619
        /* Handle the residual.  */
2620
        if (n_bytes % 4 != 0)
2621
          {
2622
            if (n_bytes % 4 >= 2)
2623
              output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2624
            if (n_bytes % 2 != 0)
2625
              output_asm_insn ("ldb 0(%1),%6", operands);
2626
            if (n_bytes % 4 >= 2)
2627
              output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2628
            if (n_bytes % 2 != 0)
2629
              output_asm_insn ("stb %6,0(%0)", operands);
2630
          }
2631
        return "";
2632
 
2633
      case 1:
2634
        /* Pre-adjust the loop counter.  */
2635
        operands[4] = GEN_INT (n_bytes - 2);
2636
        output_asm_insn ("ldi %4,%2", operands);
2637
 
2638
        /* Copying loop.  */
2639
        output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2640
        output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2641
        output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2642
        output_asm_insn ("addib,>= -2,%2,.-12", operands);
2643
        output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2644
 
2645
        /* Handle the residual.  */
2646
        if (n_bytes % 2 != 0)
2647
          {
2648
            output_asm_insn ("ldb 0(%1),%3", operands);
2649
            output_asm_insn ("stb %3,0(%0)", operands);
2650
          }
2651
        return "";
2652
 
2653
      default:
2654
        gcc_unreachable ();
2655
    }
2656
}
2657
 
2658
/* Count the number of insns necessary to handle this block move.
2659
 
2660
   Basic structure is the same as emit_block_move, except that we
2661
   count insns rather than emit them.  */
2662
 
2663
static int
2664
compute_movmem_length (rtx insn)
2665
{
2666
  rtx pat = PATTERN (insn);
2667
  unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2668
  unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2669
  unsigned int n_insns = 0;
2670
 
2671
  /* We can't move more than four bytes at a time because the PA
2672
     has no longer integer move insns.  (Could use fp mem ops?)  */
2673
  if (align > (TARGET_64BIT ? 8 : 4))
2674
    align = (TARGET_64BIT ? 8 : 4);
2675
 
2676
  /* The basic copying loop.  */
2677
  n_insns = 6;
2678
 
2679
  /* Residuals.  */
2680
  if (n_bytes % (2 * align) != 0)
2681
    {
2682
      if ((n_bytes % (2 * align)) >= align)
2683
        n_insns += 2;
2684
 
2685
      if ((n_bytes % align) != 0)
2686
        n_insns += 2;
2687
    }
2688
 
2689
  /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
2690
  return n_insns * 4;
2691
}
2692
 
2693
/* Emit code to perform a block clear.
2694
 
2695
   OPERANDS[0] is the destination pointer as a REG, clobbered.
2696
   OPERANDS[1] is a register for temporary storage.
2697
   OPERANDS[2] is the size as a CONST_INT
2698
   OPERANDS[3] is the alignment safe to use, as a CONST_INT.  */
2699
 
2700
const char *
2701
output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2702
{
2703
  int align = INTVAL (operands[3]);
2704
  unsigned long n_bytes = INTVAL (operands[2]);
2705
 
2706
  /* We can't clear more than a word at a time because the PA
2707
     has no longer integer move insns.  */
2708
  if (align > (TARGET_64BIT ? 8 : 4))
2709
    align = (TARGET_64BIT ? 8 : 4);
2710
 
2711
  /* Note that we know each loop below will execute at least twice
2712
     (else we would have open-coded the copy).  */
2713
  switch (align)
2714
    {
2715
      case 8:
2716
        /* Pre-adjust the loop counter.  */
2717
        operands[2] = GEN_INT (n_bytes - 16);
2718
        output_asm_insn ("ldi %2,%1", operands);
2719
 
2720
        /* Loop.  */
2721
        output_asm_insn ("std,ma %%r0,8(%0)", operands);
2722
        output_asm_insn ("addib,>= -16,%1,.-4", operands);
2723
        output_asm_insn ("std,ma %%r0,8(%0)", operands);
2724
 
2725
        /* Handle the residual.  There could be up to 7 bytes of
2726
           residual to copy!  */
2727
        if (n_bytes % 16 != 0)
2728
          {
2729
            operands[2] = GEN_INT (n_bytes % 8);
2730
            if (n_bytes % 16 >= 8)
2731
              output_asm_insn ("std,ma %%r0,8(%0)", operands);
2732
            if (n_bytes % 8 != 0)
2733
              output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2734
          }
2735
        return "";
2736
 
2737
      case 4:
2738
        /* Pre-adjust the loop counter.  */
2739
        operands[2] = GEN_INT (n_bytes - 8);
2740
        output_asm_insn ("ldi %2,%1", operands);
2741
 
2742
        /* Loop.  */
2743
        output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2744
        output_asm_insn ("addib,>= -8,%1,.-4", operands);
2745
        output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2746
 
2747
        /* Handle the residual.  There could be up to 7 bytes of
2748
           residual to copy!  */
2749
        if (n_bytes % 8 != 0)
2750
          {
2751
            operands[2] = GEN_INT (n_bytes % 4);
2752
            if (n_bytes % 8 >= 4)
2753
              output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2754
            if (n_bytes % 4 != 0)
2755
              output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2756
          }
2757
        return "";
2758
 
2759
      case 2:
2760
        /* Pre-adjust the loop counter.  */
2761
        operands[2] = GEN_INT (n_bytes - 4);
2762
        output_asm_insn ("ldi %2,%1", operands);
2763
 
2764
        /* Loop.  */
2765
        output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2766
        output_asm_insn ("addib,>= -4,%1,.-4", operands);
2767
        output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2768
 
2769
        /* Handle the residual.  */
2770
        if (n_bytes % 4 != 0)
2771
          {
2772
            if (n_bytes % 4 >= 2)
2773
              output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2774
            if (n_bytes % 2 != 0)
2775
              output_asm_insn ("stb %%r0,0(%0)", operands);
2776
          }
2777
        return "";
2778
 
2779
      case 1:
2780
        /* Pre-adjust the loop counter.  */
2781
        operands[2] = GEN_INT (n_bytes - 2);
2782
        output_asm_insn ("ldi %2,%1", operands);
2783
 
2784
        /* Loop.  */
2785
        output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2786
        output_asm_insn ("addib,>= -2,%1,.-4", operands);
2787
        output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2788
 
2789
        /* Handle the residual.  */
2790
        if (n_bytes % 2 != 0)
2791
          output_asm_insn ("stb %%r0,0(%0)", operands);
2792
 
2793
        return "";
2794
 
2795
      default:
2796
        gcc_unreachable ();
2797
    }
2798
}
2799
 
2800
/* Count the number of insns necessary to handle this block move.
2801
 
2802
   Basic structure is the same as emit_block_move, except that we
2803
   count insns rather than emit them.  */
2804
 
2805
static int
2806
compute_clrmem_length (rtx insn)
2807
{
2808
  rtx pat = PATTERN (insn);
2809
  unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2810
  unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2811
  unsigned int n_insns = 0;
2812
 
2813
  /* We can't clear more than a word at a time because the PA
2814
     has no longer integer move insns.  */
2815
  if (align > (TARGET_64BIT ? 8 : 4))
2816
    align = (TARGET_64BIT ? 8 : 4);
2817
 
2818
  /* The basic loop.  */
2819
  n_insns = 4;
2820
 
2821
  /* Residuals.  */
2822
  if (n_bytes % (2 * align) != 0)
2823
    {
2824
      if ((n_bytes % (2 * align)) >= align)
2825
        n_insns++;
2826
 
2827
      if ((n_bytes % align) != 0)
2828
        n_insns++;
2829
    }
2830
 
2831
  /* Lengths are expressed in bytes now; each insn is 4 bytes.  */
2832
  return n_insns * 4;
2833
}
2834
 
2835
 
2836
const char *
2837
output_and (rtx *operands)
2838
{
2839
  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2840
    {
2841
      unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2842
      int ls0, ls1, ms0, p, len;
2843
 
2844
      for (ls0 = 0; ls0 < 32; ls0++)
2845
        if ((mask & (1 << ls0)) == 0)
2846
          break;
2847
 
2848
      for (ls1 = ls0; ls1 < 32; ls1++)
2849
        if ((mask & (1 << ls1)) != 0)
2850
          break;
2851
 
2852
      for (ms0 = ls1; ms0 < 32; ms0++)
2853
        if ((mask & (1 << ms0)) == 0)
2854
          break;
2855
 
2856
      gcc_assert (ms0 == 32);
2857
 
2858
      if (ls1 == 32)
2859
        {
2860
          len = ls0;
2861
 
2862
          gcc_assert (len);
2863
 
2864
          operands[2] = GEN_INT (len);
2865
          return "{extru|extrw,u} %1,31,%2,%0";
2866
        }
2867
      else
2868
        {
2869
          /* We could use this `depi' for the case above as well, but `depi'
2870
             requires one more register file access than an `extru'.  */
2871
 
2872
          p = 31 - ls0;
2873
          len = ls1 - ls0;
2874
 
2875
          operands[2] = GEN_INT (p);
2876
          operands[3] = GEN_INT (len);
2877
          return "{depi|depwi} 0,%2,%3,%0";
2878
        }
2879
    }
2880
  else
2881
    return "and %1,%2,%0";
2882
}
2883
 
2884
/* Return a string to perform a bitwise-and of operands[1] with operands[2]
2885
   storing the result in operands[0].  */
2886
const char *
2887
output_64bit_and (rtx *operands)
2888
{
2889
  if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
2890
    {
2891
      unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2892
      int ls0, ls1, ms0, p, len;
2893
 
2894
      for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
2895
        if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
2896
          break;
2897
 
2898
      for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
2899
        if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
2900
          break;
2901
 
2902
      for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
2903
        if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
2904
          break;
2905
 
2906
      gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
2907
 
2908
      if (ls1 == HOST_BITS_PER_WIDE_INT)
2909
        {
2910
          len = ls0;
2911
 
2912
          gcc_assert (len);
2913
 
2914
          operands[2] = GEN_INT (len);
2915
          return "extrd,u %1,63,%2,%0";
2916
        }
2917
      else
2918
        {
2919
          /* We could use this `depi' for the case above as well, but `depi'
2920
             requires one more register file access than an `extru'.  */
2921
 
2922
          p = 63 - ls0;
2923
          len = ls1 - ls0;
2924
 
2925
          operands[2] = GEN_INT (p);
2926
          operands[3] = GEN_INT (len);
2927
          return "depdi 0,%2,%3,%0";
2928
        }
2929
    }
2930
  else
2931
    return "and %1,%2,%0";
2932
}
2933
 
2934
const char *
2935
output_ior (rtx *operands)
2936
{
2937
  unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2938
  int bs0, bs1, p, len;
2939
 
2940
  if (INTVAL (operands[2]) == 0)
2941
    return "copy %1,%0";
2942
 
2943
  for (bs0 = 0; bs0 < 32; bs0++)
2944
    if ((mask & (1 << bs0)) != 0)
2945
      break;
2946
 
2947
  for (bs1 = bs0; bs1 < 32; bs1++)
2948
    if ((mask & (1 << bs1)) == 0)
2949
      break;
2950
 
2951
  gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
2952
 
2953
  p = 31 - bs0;
2954
  len = bs1 - bs0;
2955
 
2956
  operands[2] = GEN_INT (p);
2957
  operands[3] = GEN_INT (len);
2958
  return "{depi|depwi} -1,%2,%3,%0";
2959
}
2960
 
2961
/* Return a string to perform a bitwise-and of operands[1] with operands[2]
2962
   storing the result in operands[0].  */
2963
const char *
2964
output_64bit_ior (rtx *operands)
2965
{
2966
  unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
2967
  int bs0, bs1, p, len;
2968
 
2969
  if (INTVAL (operands[2]) == 0)
2970
    return "copy %1,%0";
2971
 
2972
  for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
2973
    if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
2974
      break;
2975
 
2976
  for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
2977
    if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
2978
      break;
2979
 
2980
  gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
2981
              || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
2982
 
2983
  p = 63 - bs0;
2984
  len = bs1 - bs0;
2985
 
2986
  operands[2] = GEN_INT (p);
2987
  operands[3] = GEN_INT (len);
2988
  return "depdi -1,%2,%3,%0";
2989
}
2990
 
2991
/* Target hook for assembling integer objects.  This code handles
2992
   aligned SI and DI integers specially since function references
2993
   must be preceded by P%.  */
2994
 
2995
static bool
2996
pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
2997
{
2998
  if (size == UNITS_PER_WORD
2999
      && aligned_p
3000
      && function_label_operand (x, VOIDmode))
3001
    {
3002
      fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3003
      output_addr_const (asm_out_file, x);
3004
      fputc ('\n', asm_out_file);
3005
      return true;
3006
    }
3007
  return default_assemble_integer (x, size, aligned_p);
3008
}
3009
 
3010
/* Output an ascii string.  */
3011
void
3012
output_ascii (FILE *file, const char *p, int size)
3013
{
3014
  int i;
3015
  int chars_output;
3016
  unsigned char partial_output[16];     /* Max space 4 chars can occupy.  */
3017
 
3018
  /* The HP assembler can only take strings of 256 characters at one
3019
     time.  This is a limitation on input line length, *not* the
3020
     length of the string.  Sigh.  Even worse, it seems that the
3021
     restriction is in number of input characters (see \xnn &
3022
     \whatever).  So we have to do this very carefully.  */
3023
 
3024
  fputs ("\t.STRING \"", file);
3025
 
3026
  chars_output = 0;
3027
  for (i = 0; i < size; i += 4)
3028
    {
3029
      int co = 0;
3030
      int io = 0;
3031
      for (io = 0, co = 0; io < MIN (4, size - i); io++)
3032
        {
3033
          register unsigned int c = (unsigned char) p[i + io];
3034
 
3035
          if (c == '\"' || c == '\\')
3036
            partial_output[co++] = '\\';
3037
          if (c >= ' ' && c < 0177)
3038
            partial_output[co++] = c;
3039
          else
3040
            {
3041
              unsigned int hexd;
3042
              partial_output[co++] = '\\';
3043
              partial_output[co++] = 'x';
3044
              hexd =  c  / 16 - 0 + '0';
3045
              if (hexd > '9')
3046
                hexd -= '9' - 'a' + 1;
3047
              partial_output[co++] = hexd;
3048
              hexd =  c % 16 - 0 + '0';
3049
              if (hexd > '9')
3050
                hexd -= '9' - 'a' + 1;
3051
              partial_output[co++] = hexd;
3052
            }
3053
        }
3054
      if (chars_output + co > 243)
3055
        {
3056
          fputs ("\"\n\t.STRING \"", file);
3057
          chars_output = 0;
3058
        }
3059
      fwrite (partial_output, 1, (size_t) co, file);
3060
      chars_output += co;
3061
      co = 0;
3062
    }
3063
  fputs ("\"\n", file);
3064
}
3065
 
3066
/* Try to rewrite floating point comparisons & branches to avoid
3067
   useless add,tr insns.
3068
 
3069
   CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3070
   to see if FPCC is dead.  CHECK_NOTES is nonzero for the
3071
   first attempt to remove useless add,tr insns.  It is zero
3072
   for the second pass as reorg sometimes leaves bogus REG_DEAD
3073
   notes lying around.
3074
 
3075
   When CHECK_NOTES is zero we can only eliminate add,tr insns
3076
   when there's a 1:1 correspondence between fcmp and ftest/fbranch
3077
   instructions.  */
3078
static void
3079
remove_useless_addtr_insns (int check_notes)
3080
{
3081
  rtx insn;
3082
  static int pass = 0;
3083
 
3084
  /* This is fairly cheap, so always run it when optimizing.  */
3085
  if (optimize > 0)
3086
    {
3087
      int fcmp_count = 0;
3088
      int fbranch_count = 0;
3089
 
3090
      /* Walk all the insns in this function looking for fcmp & fbranch
3091
         instructions.  Keep track of how many of each we find.  */
3092
      for (insn = get_insns (); insn; insn = next_insn (insn))
3093
        {
3094
          rtx tmp;
3095
 
3096
          /* Ignore anything that isn't an INSN or a JUMP_INSN.  */
3097
          if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3098
            continue;
3099
 
3100
          tmp = PATTERN (insn);
3101
 
3102
          /* It must be a set.  */
3103
          if (GET_CODE (tmp) != SET)
3104
            continue;
3105
 
3106
          /* If the destination is CCFP, then we've found an fcmp insn.  */
3107
          tmp = SET_DEST (tmp);
3108
          if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3109
            {
3110
              fcmp_count++;
3111
              continue;
3112
            }
3113
 
3114
          tmp = PATTERN (insn);
3115
          /* If this is an fbranch instruction, bump the fbranch counter.  */
3116
          if (GET_CODE (tmp) == SET
3117
              && SET_DEST (tmp) == pc_rtx
3118
              && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3119
              && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3120
              && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3121
              && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3122
            {
3123
              fbranch_count++;
3124
              continue;
3125
            }
3126
        }
3127
 
3128
 
3129
      /* Find all floating point compare + branch insns.  If possible,
3130
         reverse the comparison & the branch to avoid add,tr insns.  */
3131
      for (insn = get_insns (); insn; insn = next_insn (insn))
3132
        {
3133
          rtx tmp, next;
3134
 
3135
          /* Ignore anything that isn't an INSN.  */
3136
          if (GET_CODE (insn) != INSN)
3137
            continue;
3138
 
3139
          tmp = PATTERN (insn);
3140
 
3141
          /* It must be a set.  */
3142
          if (GET_CODE (tmp) != SET)
3143
            continue;
3144
 
3145
          /* The destination must be CCFP, which is register zero.  */
3146
          tmp = SET_DEST (tmp);
3147
          if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3148
            continue;
3149
 
3150
          /* INSN should be a set of CCFP.
3151
 
3152
             See if the result of this insn is used in a reversed FP
3153
             conditional branch.  If so, reverse our condition and
3154
             the branch.  Doing so avoids useless add,tr insns.  */
3155
          next = next_insn (insn);
3156
          while (next)
3157
            {
3158
              /* Jumps, calls and labels stop our search.  */
3159
              if (GET_CODE (next) == JUMP_INSN
3160
                  || GET_CODE (next) == CALL_INSN
3161
                  || GET_CODE (next) == CODE_LABEL)
3162
                break;
3163
 
3164
              /* As does another fcmp insn.  */
3165
              if (GET_CODE (next) == INSN
3166
                  && GET_CODE (PATTERN (next)) == SET
3167
                  && GET_CODE (SET_DEST (PATTERN (next))) == REG
3168
                  && REGNO (SET_DEST (PATTERN (next))) == 0)
3169
                break;
3170
 
3171
              next = next_insn (next);
3172
            }
3173
 
3174
          /* Is NEXT_INSN a branch?  */
3175
          if (next
3176
              && GET_CODE (next) == JUMP_INSN)
3177
            {
3178
              rtx pattern = PATTERN (next);
3179
 
3180
              /* If it a reversed fp conditional branch (e.g. uses add,tr)
3181
                 and CCFP dies, then reverse our conditional and the branch
3182
                 to avoid the add,tr.  */
3183
              if (GET_CODE (pattern) == SET
3184
                  && SET_DEST (pattern) == pc_rtx
3185
                  && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3186
                  && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3187
                  && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3188
                  && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3189
                  && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3190
                  && (fcmp_count == fbranch_count
3191
                      || (check_notes
3192
                          && find_regno_note (next, REG_DEAD, 0))))
3193
                {
3194
                  /* Reverse the branch.  */
3195
                  tmp = XEXP (SET_SRC (pattern), 1);
3196
                  XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3197
                  XEXP (SET_SRC (pattern), 2) = tmp;
3198
                  INSN_CODE (next) = -1;
3199
 
3200
                  /* Reverse our condition.  */
3201
                  tmp = PATTERN (insn);
3202
                  PUT_CODE (XEXP (tmp, 1),
3203
                            (reverse_condition_maybe_unordered
3204
                             (GET_CODE (XEXP (tmp, 1)))));
3205
                }
3206
            }
3207
        }
3208
    }
3209
 
3210
  pass = !pass;
3211
 
3212
}
3213
 
3214
/* You may have trouble believing this, but this is the 32 bit HP-PA
3215
   stack layout.  Wow.
3216
 
3217
   Offset               Contents
3218
 
3219
   Variable arguments   (optional; any number may be allocated)
3220
 
3221
   SP-(4*(N+9))         arg word N
3222
        :                   :
3223
      SP-56             arg word 5
3224
      SP-52             arg word 4
3225
 
3226
   Fixed arguments      (must be allocated; may remain unused)
3227
 
3228
      SP-48             arg word 3
3229
      SP-44             arg word 2
3230
      SP-40             arg word 1
3231
      SP-36             arg word 0
3232
 
3233
   Frame Marker
3234
 
3235
      SP-32             External Data Pointer (DP)
3236
      SP-28             External sr4
3237
      SP-24             External/stub RP (RP')
3238
      SP-20             Current RP
3239
      SP-16             Static Link
3240
      SP-12             Clean up
3241
      SP-8              Calling Stub RP (RP'')
3242
      SP-4              Previous SP
3243
 
3244
   Top of Frame
3245
 
3246
      SP-0              Stack Pointer (points to next available address)
3247
 
3248
*/
3249
 
3250
/* This function saves registers as follows.  Registers marked with ' are
3251
   this function's registers (as opposed to the previous function's).
3252
   If a frame_pointer isn't needed, r4 is saved as a general register;
3253
   the space for the frame pointer is still allocated, though, to keep
3254
   things simple.
3255
 
3256
 
3257
   Top of Frame
3258
 
3259
       SP (FP')         Previous FP
3260
       SP + 4           Alignment filler (sigh)
3261
       SP + 8           Space for locals reserved here.
3262
       .
3263
       .
3264
       .
3265
       SP + n           All call saved register used.
3266
       .
3267
       .
3268
       .
3269
       SP + o           All call saved fp registers used.
3270
       .
3271
       .
3272
       .
3273
       SP + p (SP')     points to next available address.
3274
 
3275
*/
3276
 
3277
/* Global variables set by output_function_prologue().  */
3278
/* Size of frame.  Need to know this to emit return insns from
3279
   leaf procedures.  */
3280
static HOST_WIDE_INT actual_fsize, local_fsize;
3281
static int save_fregs;
3282
 
3283
/* Emit RTL to store REG at the memory location specified by BASE+DISP.
3284
   Handle case where DISP > 8k by using the add_high_const patterns.
3285
 
3286
   Note in DISP > 8k case, we will leave the high part of the address
3287
   in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3288
 
3289
static void
3290
store_reg (int reg, HOST_WIDE_INT disp, int base)
3291
{
3292
  rtx insn, dest, src, basereg;
3293
 
3294
  src = gen_rtx_REG (word_mode, reg);
3295
  basereg = gen_rtx_REG (Pmode, base);
3296
  if (VAL_14_BITS_P (disp))
3297
    {
3298
      dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3299
      insn = emit_move_insn (dest, src);
3300
    }
3301
  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3302
    {
3303
      rtx delta = GEN_INT (disp);
3304
      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3305
 
3306
      emit_move_insn (tmpreg, delta);
3307
      insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3308
      if (DO_FRAME_NOTES)
3309
        {
3310
          REG_NOTES (insn)
3311
            = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3312
                gen_rtx_SET (VOIDmode, tmpreg,
3313
                             gen_rtx_PLUS (Pmode, basereg, delta)),
3314
                REG_NOTES (insn));
3315
          RTX_FRAME_RELATED_P (insn) = 1;
3316
        }
3317
      dest = gen_rtx_MEM (word_mode, tmpreg);
3318
      insn = emit_move_insn (dest, src);
3319
    }
3320
  else
3321
    {
3322
      rtx delta = GEN_INT (disp);
3323
      rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3324
      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3325
 
3326
      emit_move_insn (tmpreg, high);
3327
      dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3328
      insn = emit_move_insn (dest, src);
3329
      if (DO_FRAME_NOTES)
3330
        {
3331
          REG_NOTES (insn)
3332
            = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3333
                gen_rtx_SET (VOIDmode,
3334
                             gen_rtx_MEM (word_mode,
3335
                                          gen_rtx_PLUS (word_mode, basereg,
3336
                                                        delta)),
3337
                             src),
3338
                REG_NOTES (insn));
3339
        }
3340
    }
3341
 
3342
  if (DO_FRAME_NOTES)
3343
    RTX_FRAME_RELATED_P (insn) = 1;
3344
}
3345
 
3346
/* Emit RTL to store REG at the memory location specified by BASE and then
3347
   add MOD to BASE.  MOD must be <= 8k.  */
3348
 
3349
static void
3350
store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3351
{
3352
  rtx insn, basereg, srcreg, delta;
3353
 
3354
  gcc_assert (VAL_14_BITS_P (mod));
3355
 
3356
  basereg = gen_rtx_REG (Pmode, base);
3357
  srcreg = gen_rtx_REG (word_mode, reg);
3358
  delta = GEN_INT (mod);
3359
 
3360
  insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3361
  if (DO_FRAME_NOTES)
3362
    {
3363
      RTX_FRAME_RELATED_P (insn) = 1;
3364
 
3365
      /* RTX_FRAME_RELATED_P must be set on each frame related set
3366
         in a parallel with more than one element.  */
3367
      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3368
      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3369
    }
3370
}
3371
 
3372
/* Emit RTL to set REG to the value specified by BASE+DISP.  Handle case
3373
   where DISP > 8k by using the add_high_const patterns.  NOTE indicates
3374
   whether to add a frame note or not.
3375
 
3376
   In the DISP > 8k case, we leave the high part of the address in %r1.
3377
   There is code in expand_hppa_{prologue,epilogue} that knows about this.  */
3378
 
3379
static void
3380
set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3381
{
3382
  rtx insn;
3383
 
3384
  if (VAL_14_BITS_P (disp))
3385
    {
3386
      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3387
                             plus_constant (gen_rtx_REG (Pmode, base), disp));
3388
    }
3389
  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3390
    {
3391
      rtx basereg = gen_rtx_REG (Pmode, base);
3392
      rtx delta = GEN_INT (disp);
3393
      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3394
 
3395
      emit_move_insn (tmpreg, delta);
3396
      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3397
                             gen_rtx_PLUS (Pmode, tmpreg, basereg));
3398
      if (DO_FRAME_NOTES)
3399
        REG_NOTES (insn)
3400
          = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3401
              gen_rtx_SET (VOIDmode, tmpreg,
3402
                           gen_rtx_PLUS (Pmode, basereg, delta)),
3403
              REG_NOTES (insn));
3404
    }
3405
  else
3406
    {
3407
      rtx basereg = gen_rtx_REG (Pmode, base);
3408
      rtx delta = GEN_INT (disp);
3409
      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3410
 
3411
      emit_move_insn (tmpreg,
3412
                      gen_rtx_PLUS (Pmode, basereg,
3413
                                    gen_rtx_HIGH (Pmode, delta)));
3414
      insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3415
                             gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3416
    }
3417
 
3418
  if (DO_FRAME_NOTES && note)
3419
    RTX_FRAME_RELATED_P (insn) = 1;
3420
}
3421
 
3422
HOST_WIDE_INT
3423
compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
3424
{
3425
  int freg_saved = 0;
3426
  int i, j;
3427
 
3428
  /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3429
     be consistent with the rounding and size calculation done here.
3430
     Change them at the same time.  */
3431
 
3432
  /* We do our own stack alignment.  First, round the size of the
3433
     stack locals up to a word boundary.  */
3434
  size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3435
 
3436
  /* Space for previous frame pointer + filler.  If any frame is
3437
     allocated, we need to add in the STARTING_FRAME_OFFSET.  We
3438
     waste some space here for the sake of HP compatibility.  The
3439
     first slot is only used when the frame pointer is needed.  */
3440
  if (size || frame_pointer_needed)
3441
    size += STARTING_FRAME_OFFSET;
3442
 
3443
  /* If the current function calls __builtin_eh_return, then we need
3444
     to allocate stack space for registers that will hold data for
3445
     the exception handler.  */
3446
  if (DO_FRAME_NOTES && current_function_calls_eh_return)
3447
    {
3448
      unsigned int i;
3449
 
3450
      for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3451
        continue;
3452
      size += i * UNITS_PER_WORD;
3453
    }
3454
 
3455
  /* Account for space used by the callee general register saves.  */
3456
  for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3457
    if (regs_ever_live[i])
3458
      size += UNITS_PER_WORD;
3459
 
3460
  /* Account for space used by the callee floating point register saves.  */
3461
  for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3462
    if (regs_ever_live[i]
3463
        || (!TARGET_64BIT && regs_ever_live[i + 1]))
3464
      {
3465
        freg_saved = 1;
3466
 
3467
        /* We always save both halves of the FP register, so always
3468
           increment the frame size by 8 bytes.  */
3469
        size += 8;
3470
      }
3471
 
3472
  /* If any of the floating registers are saved, account for the
3473
     alignment needed for the floating point register save block.  */
3474
  if (freg_saved)
3475
    {
3476
      size = (size + 7) & ~7;
3477
      if (fregs_live)
3478
        *fregs_live = 1;
3479
    }
3480
 
3481
  /* The various ABIs include space for the outgoing parameters in the
3482
     size of the current function's stack frame.  We don't need to align
3483
     for the outgoing arguments as their alignment is set by the final
3484
     rounding for the frame as a whole.  */
3485
  size += current_function_outgoing_args_size;
3486
 
3487
  /* Allocate space for the fixed frame marker.  This space must be
3488
     allocated for any function that makes calls or allocates
3489
     stack space.  */
3490
  if (!current_function_is_leaf || size)
3491
    size += TARGET_64BIT ? 48 : 32;
3492
 
3493
  /* Finally, round to the preferred stack boundary.  */
3494
  return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3495
          & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3496
}
3497
 
3498
/* Generate the assembly code for function entry.  FILE is a stdio
3499
   stream to output the code to.  SIZE is an int: how many units of
3500
   temporary storage to allocate.
3501
 
3502
   Refer to the array `regs_ever_live' to determine which registers to
3503
   save; `regs_ever_live[I]' is nonzero if register number I is ever
3504
   used in the function.  This function is responsible for knowing
3505
   which registers should not be saved even if used.  */
3506
 
3507
/* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3508
   of memory.  If any fpu reg is used in the function, we allocate
3509
   such a block here, at the bottom of the frame, just in case it's needed.
3510
 
3511
   If this function is a leaf procedure, then we may choose not
3512
   to do a "save" insn.  The decision about whether or not
3513
   to do this is made in regclass.c.  */
3514
 
3515
static void
3516
pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3517
{
3518
  /* The function's label and associated .PROC must never be
3519
     separated and must be output *after* any profiling declarations
3520
     to avoid changing spaces/subspaces within a procedure.  */
3521
  ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3522
  fputs ("\t.PROC\n", file);
3523
 
3524
  /* hppa_expand_prologue does the dirty work now.  We just need
3525
     to output the assembler directives which denote the start
3526
     of a function.  */
3527
  fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3528
  if (regs_ever_live[2])
3529
    fputs (",CALLS,SAVE_RP", file);
3530
  else
3531
    fputs (",NO_CALLS", file);
3532
 
3533
  /* The SAVE_SP flag is used to indicate that register %r3 is stored
3534
     at the beginning of the frame and that it is used as the frame
3535
     pointer for the frame.  We do this because our current frame
3536
     layout doesn't conform to that specified in the HP runtime
3537
     documentation and we need a way to indicate to programs such as
3538
     GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
3539
     isn't used by HP compilers but is supported by the assembler.
3540
     However, SAVE_SP is supposed to indicate that the previous stack
3541
     pointer has been saved in the frame marker.  */
3542
  if (frame_pointer_needed)
3543
    fputs (",SAVE_SP", file);
3544
 
3545
  /* Pass on information about the number of callee register saves
3546
     performed in the prologue.
3547
 
3548
     The compiler is supposed to pass the highest register number
3549
     saved, the assembler then has to adjust that number before
3550
     entering it into the unwind descriptor (to account for any
3551
     caller saved registers with lower register numbers than the
3552
     first callee saved register).  */
3553
  if (gr_saved)
3554
    fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3555
 
3556
  if (fr_saved)
3557
    fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3558
 
3559
  fputs ("\n\t.ENTRY\n", file);
3560
 
3561
  remove_useless_addtr_insns (0);
3562
}
3563
 
3564
void
3565
hppa_expand_prologue (void)
3566
{
3567
  int merge_sp_adjust_with_store = 0;
3568
  HOST_WIDE_INT size = get_frame_size ();
3569
  HOST_WIDE_INT offset;
3570
  int i;
3571
  rtx insn, tmpreg;
3572
 
3573
  gr_saved = 0;
3574
  fr_saved = 0;
3575
  save_fregs = 0;
3576
 
3577
  /* Compute total size for frame pointer, filler, locals and rounding to
3578
     the next word boundary.  Similar code appears in compute_frame_size
3579
     and must be changed in tandem with this code.  */
3580
  local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3581
  if (local_fsize || frame_pointer_needed)
3582
    local_fsize += STARTING_FRAME_OFFSET;
3583
 
3584
  actual_fsize = compute_frame_size (size, &save_fregs);
3585
 
3586
  /* Compute a few things we will use often.  */
3587
  tmpreg = gen_rtx_REG (word_mode, 1);
3588
 
3589
  /* Save RP first.  The calling conventions manual states RP will
3590
     always be stored into the caller's frame at sp - 20 or sp - 16
3591
     depending on which ABI is in use.  */
3592
  if (regs_ever_live[2] || current_function_calls_eh_return)
3593
    store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3594
 
3595
  /* Allocate the local frame and set up the frame pointer if needed.  */
3596
  if (actual_fsize != 0)
3597
    {
3598
      if (frame_pointer_needed)
3599
        {
3600
          /* Copy the old frame pointer temporarily into %r1.  Set up the
3601
             new stack pointer, then store away the saved old frame pointer
3602
             into the stack at sp and at the same time update the stack
3603
             pointer by actual_fsize bytes.  Two versions, first
3604
             handles small (<8k) frames.  The second handles large (>=8k)
3605
             frames.  */
3606
          insn = emit_move_insn (tmpreg, frame_pointer_rtx);
3607
          if (DO_FRAME_NOTES)
3608
            RTX_FRAME_RELATED_P (insn) = 1;
3609
 
3610
          insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
3611
          if (DO_FRAME_NOTES)
3612
            RTX_FRAME_RELATED_P (insn) = 1;
3613
 
3614
          if (VAL_14_BITS_P (actual_fsize))
3615
            store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3616
          else
3617
            {
3618
              /* It is incorrect to store the saved frame pointer at *sp,
3619
                 then increment sp (writes beyond the current stack boundary).
3620
 
3621
                 So instead use stwm to store at *sp and post-increment the
3622
                 stack pointer as an atomic operation.  Then increment sp to
3623
                 finish allocating the new frame.  */
3624
              HOST_WIDE_INT adjust1 = 8192 - 64;
3625
              HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3626
 
3627
              store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3628
              set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3629
                              adjust2, 1);
3630
            }
3631
 
3632
          /* We set SAVE_SP in frames that need a frame pointer.  Thus,
3633
             we need to store the previous stack pointer (frame pointer)
3634
             into the frame marker on targets that use the HP unwind
3635
             library.  This allows the HP unwind library to be used to
3636
             unwind GCC frames.  However, we are not fully compatible
3637
             with the HP library because our frame layout differs from
3638
             that specified in the HP runtime specification.
3639
 
3640
             We don't want a frame note on this instruction as the frame
3641
             marker moves during dynamic stack allocation.
3642
 
3643
             This instruction also serves as a blockage to prevent
3644
             register spills from being scheduled before the stack
3645
             pointer is raised.  This is necessary as we store
3646
             registers using the frame pointer as a base register,
3647
             and the frame pointer is set before sp is raised.  */
3648
          if (TARGET_HPUX_UNWIND_LIBRARY)
3649
            {
3650
              rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3651
                                       GEN_INT (TARGET_64BIT ? -8 : -4));
3652
 
3653
              emit_move_insn (gen_rtx_MEM (word_mode, addr),
3654
                              frame_pointer_rtx);
3655
            }
3656
          else
3657
            emit_insn (gen_blockage ());
3658
        }
3659
      /* no frame pointer needed.  */
3660
      else
3661
        {
3662
          /* In some cases we can perform the first callee register save
3663
             and allocating the stack frame at the same time.   If so, just
3664
             make a note of it and defer allocating the frame until saving
3665
             the callee registers.  */
3666
          if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
3667
            merge_sp_adjust_with_store = 1;
3668
          /* Can not optimize.  Adjust the stack frame by actual_fsize
3669
             bytes.  */
3670
          else
3671
            set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3672
                            actual_fsize, 1);
3673
        }
3674
    }
3675
 
3676
  /* Normal register save.
3677
 
3678
     Do not save the frame pointer in the frame_pointer_needed case.  It
3679
     was done earlier.  */
3680
  if (frame_pointer_needed)
3681
    {
3682
      offset = local_fsize;
3683
 
3684
      /* Saving the EH return data registers in the frame is the simplest
3685
         way to get the frame unwind information emitted.  We put them
3686
         just before the general registers.  */
3687
      if (DO_FRAME_NOTES && current_function_calls_eh_return)
3688
        {
3689
          unsigned int i, regno;
3690
 
3691
          for (i = 0; ; ++i)
3692
            {
3693
              regno = EH_RETURN_DATA_REGNO (i);
3694
              if (regno == INVALID_REGNUM)
3695
                break;
3696
 
3697
              store_reg (regno, offset, FRAME_POINTER_REGNUM);
3698
              offset += UNITS_PER_WORD;
3699
            }
3700
        }
3701
 
3702
      for (i = 18; i >= 4; i--)
3703
        if (regs_ever_live[i] && ! call_used_regs[i])
3704
          {
3705
            store_reg (i, offset, FRAME_POINTER_REGNUM);
3706
            offset += UNITS_PER_WORD;
3707
            gr_saved++;
3708
          }
3709
      /* Account for %r3 which is saved in a special place.  */
3710
      gr_saved++;
3711
    }
3712
  /* No frame pointer needed.  */
3713
  else
3714
    {
3715
      offset = local_fsize - actual_fsize;
3716
 
3717
      /* Saving the EH return data registers in the frame is the simplest
3718
         way to get the frame unwind information emitted.  */
3719
      if (DO_FRAME_NOTES && current_function_calls_eh_return)
3720
        {
3721
          unsigned int i, regno;
3722
 
3723
          for (i = 0; ; ++i)
3724
            {
3725
              regno = EH_RETURN_DATA_REGNO (i);
3726
              if (regno == INVALID_REGNUM)
3727
                break;
3728
 
3729
              /* If merge_sp_adjust_with_store is nonzero, then we can
3730
                 optimize the first save.  */
3731
              if (merge_sp_adjust_with_store)
3732
                {
3733
                  store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3734
                  merge_sp_adjust_with_store = 0;
3735
                }
3736
              else
3737
                store_reg (regno, offset, STACK_POINTER_REGNUM);
3738
              offset += UNITS_PER_WORD;
3739
            }
3740
        }
3741
 
3742
      for (i = 18; i >= 3; i--)
3743
        if (regs_ever_live[i] && ! call_used_regs[i])
3744
          {
3745
            /* If merge_sp_adjust_with_store is nonzero, then we can
3746
               optimize the first GR save.  */
3747
            if (merge_sp_adjust_with_store)
3748
              {
3749
                store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
3750
                merge_sp_adjust_with_store = 0;
3751
              }
3752
            else
3753
              store_reg (i, offset, STACK_POINTER_REGNUM);
3754
            offset += UNITS_PER_WORD;
3755
            gr_saved++;
3756
          }
3757
 
3758
      /* If we wanted to merge the SP adjustment with a GR save, but we never
3759
         did any GR saves, then just emit the adjustment here.  */
3760
      if (merge_sp_adjust_with_store)
3761
        set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3762
                        actual_fsize, 1);
3763
    }
3764
 
3765
  /* The hppa calling conventions say that %r19, the pic offset
3766
     register, is saved at sp - 32 (in this function's frame)
3767
     when generating PIC code.  FIXME:  What is the correct thing
3768
     to do for functions which make no calls and allocate no
3769
     frame?  Do we need to allocate a frame, or can we just omit
3770
     the save?   For now we'll just omit the save.
3771
 
3772
     We don't want a note on this insn as the frame marker can
3773
     move if there is a dynamic stack allocation.  */
3774
  if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3775
    {
3776
      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3777
 
3778
      emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3779
 
3780
    }
3781
 
3782
  /* Align pointer properly (doubleword boundary).  */
3783
  offset = (offset + 7) & ~7;
3784
 
3785
  /* Floating point register store.  */
3786
  if (save_fregs)
3787
    {
3788
      rtx base;
3789
 
3790
      /* First get the frame or stack pointer to the start of the FP register
3791
         save area.  */
3792
      if (frame_pointer_needed)
3793
        {
3794
          set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
3795
          base = frame_pointer_rtx;
3796
        }
3797
      else
3798
        {
3799
          set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3800
          base = stack_pointer_rtx;
3801
        }
3802
 
3803
      /* Now actually save the FP registers.  */
3804
      for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3805
        {
3806
          if (regs_ever_live[i]
3807
              || (! TARGET_64BIT && regs_ever_live[i + 1]))
3808
            {
3809
              rtx addr, insn, reg;
3810
              addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3811
              reg = gen_rtx_REG (DFmode, i);
3812
              insn = emit_move_insn (addr, reg);
3813
              if (DO_FRAME_NOTES)
3814
                {
3815
                  RTX_FRAME_RELATED_P (insn) = 1;
3816
                  if (TARGET_64BIT)
3817
                    {
3818
                      rtx mem = gen_rtx_MEM (DFmode,
3819
                                             plus_constant (base, offset));
3820
                      REG_NOTES (insn)
3821
                        = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3822
                                             gen_rtx_SET (VOIDmode, mem, reg),
3823
                                             REG_NOTES (insn));
3824
                    }
3825
                  else
3826
                    {
3827
                      rtx meml = gen_rtx_MEM (SFmode,
3828
                                              plus_constant (base, offset));
3829
                      rtx memr = gen_rtx_MEM (SFmode,
3830
                                              plus_constant (base, offset + 4));
3831
                      rtx regl = gen_rtx_REG (SFmode, i);
3832
                      rtx regr = gen_rtx_REG (SFmode, i + 1);
3833
                      rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
3834
                      rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
3835
                      rtvec vec;
3836
 
3837
                      RTX_FRAME_RELATED_P (setl) = 1;
3838
                      RTX_FRAME_RELATED_P (setr) = 1;
3839
                      vec = gen_rtvec (2, setl, setr);
3840
                      REG_NOTES (insn)
3841
                        = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3842
                                             gen_rtx_SEQUENCE (VOIDmode, vec),
3843
                                             REG_NOTES (insn));
3844
                    }
3845
                }
3846
              offset += GET_MODE_SIZE (DFmode);
3847
              fr_saved++;
3848
            }
3849
        }
3850
    }
3851
}
3852
 
3853
/* Emit RTL to load REG from the memory location specified by BASE+DISP.
3854
   Handle case where DISP > 8k by using the add_high_const patterns.  */
3855
 
3856
static void
3857
load_reg (int reg, HOST_WIDE_INT disp, int base)
3858
{
3859
  rtx dest = gen_rtx_REG (word_mode, reg);
3860
  rtx basereg = gen_rtx_REG (Pmode, base);
3861
  rtx src;
3862
 
3863
  if (VAL_14_BITS_P (disp))
3864
    src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
3865
  else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3866
    {
3867
      rtx delta = GEN_INT (disp);
3868
      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3869
 
3870
      emit_move_insn (tmpreg, delta);
3871
      if (TARGET_DISABLE_INDEXING)
3872
        {
3873
          emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3874
          src = gen_rtx_MEM (word_mode, tmpreg);
3875
        }
3876
      else
3877
        src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3878
    }
3879
  else
3880
    {
3881
      rtx delta = GEN_INT (disp);
3882
      rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3883
      rtx tmpreg = gen_rtx_REG (Pmode, 1);
3884
 
3885
      emit_move_insn (tmpreg, high);
3886
      src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3887
    }
3888
 
3889
  emit_move_insn (dest, src);
3890
}
3891
 
3892
/* Update the total code bytes output to the text section.  */
3893
 
3894
static void
3895
update_total_code_bytes (int nbytes)
3896
{
3897
  if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
3898
      && !IN_NAMED_SECTION_P (cfun->decl))
3899
    {
3900
      if (INSN_ADDRESSES_SET_P ())
3901
        {
3902
          unsigned long old_total = total_code_bytes;
3903
 
3904
          total_code_bytes += nbytes;
3905
 
3906
          /* Be prepared to handle overflows.  */
3907
          if (old_total > total_code_bytes)
3908
            total_code_bytes = -1;
3909
        }
3910
      else
3911
        total_code_bytes = -1;
3912
    }
3913
}
3914
 
3915
/* This function generates the assembly code for function exit.
3916
   Args are as for output_function_prologue ().
3917
 
3918
   The function epilogue should not depend on the current stack
3919
   pointer!  It should use the frame pointer only.  This is mandatory
3920
   because of alloca; we also take advantage of it to omit stack
3921
   adjustments before returning.  */
3922
 
3923
static void
3924
pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3925
{
3926
  rtx insn = get_last_insn ();
3927
 
3928
  last_address = 0;
3929
 
3930
  /* hppa_expand_epilogue does the dirty work now.  We just need
3931
     to output the assembler directives which denote the end
3932
     of a function.
3933
 
3934
     To make debuggers happy, emit a nop if the epilogue was completely
3935
     eliminated due to a volatile call as the last insn in the
3936
     current function.  That way the return address (in %r2) will
3937
     always point to a valid instruction in the current function.  */
3938
 
3939
  /* Get the last real insn.  */
3940
  if (GET_CODE (insn) == NOTE)
3941
    insn = prev_real_insn (insn);
3942
 
3943
  /* If it is a sequence, then look inside.  */
3944
  if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
3945
    insn = XVECEXP (PATTERN (insn), 0, 0);
3946
 
3947
  /* If insn is a CALL_INSN, then it must be a call to a volatile
3948
     function (otherwise there would be epilogue insns).  */
3949
  if (insn && GET_CODE (insn) == CALL_INSN)
3950
    {
3951
      fputs ("\tnop\n", file);
3952
      last_address += 4;
3953
    }
3954
 
3955
  fputs ("\t.EXIT\n\t.PROCEND\n", file);
3956
 
3957
  if (TARGET_SOM && TARGET_GAS)
3958
    {
3959
      /* We done with this subspace except possibly for some additional
3960
         debug information.  Forget that we are in this subspace to ensure
3961
         that the next function is output in its own subspace.  */
3962
      in_section = NULL;
3963
      cfun->machine->in_nsubspa = 2;
3964
    }
3965
 
3966
  if (INSN_ADDRESSES_SET_P ())
3967
    {
3968
      insn = get_last_nonnote_insn ();
3969
      last_address += INSN_ADDRESSES (INSN_UID (insn));
3970
      if (INSN_P (insn))
3971
        last_address += insn_default_length (insn);
3972
      last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
3973
                      & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
3974
    }
3975
 
3976
  /* Finally, update the total number of code bytes output so far.  */
3977
  update_total_code_bytes (last_address);
3978
}
3979
 
3980
void
3981
hppa_expand_epilogue (void)
3982
{
3983
  rtx tmpreg;
3984
  HOST_WIDE_INT offset;
3985
  HOST_WIDE_INT ret_off = 0;
3986
  int i;
3987
  int merge_sp_adjust_with_load = 0;
3988
 
3989
  /* We will use this often.  */
3990
  tmpreg = gen_rtx_REG (word_mode, 1);
3991
 
3992
  /* Try to restore RP early to avoid load/use interlocks when
3993
     RP gets used in the return (bv) instruction.  This appears to still
3994
     be necessary even when we schedule the prologue and epilogue.  */
3995
  if (regs_ever_live [2] || current_function_calls_eh_return)
3996
    {
3997
      ret_off = TARGET_64BIT ? -16 : -20;
3998
      if (frame_pointer_needed)
3999
        {
4000
          load_reg (2, ret_off, FRAME_POINTER_REGNUM);
4001
          ret_off = 0;
4002
        }
4003
      else
4004
        {
4005
          /* No frame pointer, and stack is smaller than 8k.  */
4006
          if (VAL_14_BITS_P (ret_off - actual_fsize))
4007
            {
4008
              load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4009
              ret_off = 0;
4010
            }
4011
        }
4012
    }
4013
 
4014
  /* General register restores.  */
4015
  if (frame_pointer_needed)
4016
    {
4017
      offset = local_fsize;
4018
 
4019
      /* If the current function calls __builtin_eh_return, then we need
4020
         to restore the saved EH data registers.  */
4021
      if (DO_FRAME_NOTES && current_function_calls_eh_return)
4022
        {
4023
          unsigned int i, regno;
4024
 
4025
          for (i = 0; ; ++i)
4026
            {
4027
              regno = EH_RETURN_DATA_REGNO (i);
4028
              if (regno == INVALID_REGNUM)
4029
                break;
4030
 
4031
              load_reg (regno, offset, FRAME_POINTER_REGNUM);
4032
              offset += UNITS_PER_WORD;
4033
            }
4034
        }
4035
 
4036
      for (i = 18; i >= 4; i--)
4037
        if (regs_ever_live[i] && ! call_used_regs[i])
4038
          {
4039
            load_reg (i, offset, FRAME_POINTER_REGNUM);
4040
            offset += UNITS_PER_WORD;
4041
          }
4042
    }
4043
  else
4044
    {
4045
      offset = local_fsize - actual_fsize;
4046
 
4047
      /* If the current function calls __builtin_eh_return, then we need
4048
         to restore the saved EH data registers.  */
4049
      if (DO_FRAME_NOTES && current_function_calls_eh_return)
4050
        {
4051
          unsigned int i, regno;
4052
 
4053
          for (i = 0; ; ++i)
4054
            {
4055
              regno = EH_RETURN_DATA_REGNO (i);
4056
              if (regno == INVALID_REGNUM)
4057
                break;
4058
 
4059
              /* Only for the first load.
4060
                 merge_sp_adjust_with_load holds the register load
4061
                 with which we will merge the sp adjustment.  */
4062
              if (merge_sp_adjust_with_load == 0
4063
                  && local_fsize == 0
4064
                  && VAL_14_BITS_P (-actual_fsize))
4065
                merge_sp_adjust_with_load = regno;
4066
              else
4067
                load_reg (regno, offset, STACK_POINTER_REGNUM);
4068
              offset += UNITS_PER_WORD;
4069
            }
4070
        }
4071
 
4072
      for (i = 18; i >= 3; i--)
4073
        {
4074
          if (regs_ever_live[i] && ! call_used_regs[i])
4075
            {
4076
              /* Only for the first load.
4077
                 merge_sp_adjust_with_load holds the register load
4078
                 with which we will merge the sp adjustment.  */
4079
              if (merge_sp_adjust_with_load == 0
4080
                  && local_fsize == 0
4081
                  && VAL_14_BITS_P (-actual_fsize))
4082
                merge_sp_adjust_with_load = i;
4083
              else
4084
                load_reg (i, offset, STACK_POINTER_REGNUM);
4085
              offset += UNITS_PER_WORD;
4086
            }
4087
        }
4088
    }
4089
 
4090
  /* Align pointer properly (doubleword boundary).  */
4091
  offset = (offset + 7) & ~7;
4092
 
4093
  /* FP register restores.  */
4094
  if (save_fregs)
4095
    {
4096
      /* Adjust the register to index off of.  */
4097
      if (frame_pointer_needed)
4098
        set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset, 0);
4099
      else
4100
        set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4101
 
4102
      /* Actually do the restores now.  */
4103
      for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4104
        if (regs_ever_live[i]
4105
            || (! TARGET_64BIT && regs_ever_live[i + 1]))
4106
          {
4107
            rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4108
            rtx dest = gen_rtx_REG (DFmode, i);
4109
            emit_move_insn (dest, src);
4110
          }
4111
    }
4112
 
4113
  /* Emit a blockage insn here to keep these insns from being moved to
4114
     an earlier spot in the epilogue, or into the main instruction stream.
4115
 
4116
     This is necessary as we must not cut the stack back before all the
4117
     restores are finished.  */
4118
  emit_insn (gen_blockage ());
4119
 
4120
  /* Reset stack pointer (and possibly frame pointer).  The stack
4121
     pointer is initially set to fp + 64 to avoid a race condition.  */
4122
  if (frame_pointer_needed)
4123
    {
4124
      rtx delta = GEN_INT (-64);
4125
 
4126
      set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64, 0);
4127
      emit_insn (gen_pre_load (frame_pointer_rtx, stack_pointer_rtx, delta));
4128
    }
4129
  /* If we were deferring a callee register restore, do it now.  */
4130
  else if (merge_sp_adjust_with_load)
4131
    {
4132
      rtx delta = GEN_INT (-actual_fsize);
4133
      rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4134
 
4135
      emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4136
    }
4137
  else if (actual_fsize != 0)
4138
    set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4139
                    - actual_fsize, 0);
4140
 
4141
  /* If we haven't restored %r2 yet (no frame pointer, and a stack
4142
     frame greater than 8k), do so now.  */
4143
  if (ret_off != 0)
4144
    load_reg (2, ret_off, STACK_POINTER_REGNUM);
4145
 
4146
  if (DO_FRAME_NOTES && current_function_calls_eh_return)
4147
    {
4148
      rtx sa = EH_RETURN_STACKADJ_RTX;
4149
 
4150
      emit_insn (gen_blockage ());
4151
      emit_insn (TARGET_64BIT
4152
                 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4153
                 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4154
    }
4155
}
4156
 
4157
rtx
4158
hppa_pic_save_rtx (void)
4159
{
4160
  return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4161
}
4162
 
4163
#ifndef NO_DEFERRED_PROFILE_COUNTERS
4164
#define NO_DEFERRED_PROFILE_COUNTERS 0
4165
#endif
4166
 
4167
/* Define heap vector type for funcdef numbers.  */
4168
DEF_VEC_I(int);
4169
DEF_VEC_ALLOC_I(int,heap);
4170
 
4171
/* Vector of funcdef numbers.  */
4172
static VEC(int,heap) *funcdef_nos;
4173
 
4174
/* Output deferred profile counters.  */
4175
static void
4176
output_deferred_profile_counters (void)
4177
{
4178
  unsigned int i;
4179
  int align, n;
4180
 
4181
  if (VEC_empty (int, funcdef_nos))
4182
   return;
4183
 
4184
  switch_to_section (data_section);
4185
  align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4186
  ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4187
 
4188
  for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4189
    {
4190
      targetm.asm_out.internal_label (asm_out_file, "LP", n);
4191
      assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4192
    }
4193
 
4194
  VEC_free (int, heap, funcdef_nos);
4195
}
4196
 
4197
void
4198
hppa_profile_hook (int label_no)
4199
{
4200
  /* We use SImode for the address of the function in both 32 and
4201
     64-bit code to avoid having to provide DImode versions of the
4202
     lcla2 and load_offset_label_address insn patterns.  */
4203
  rtx reg = gen_reg_rtx (SImode);
4204
  rtx label_rtx = gen_label_rtx ();
4205
  rtx begin_label_rtx, call_insn;
4206
  char begin_label_name[16];
4207
 
4208
  ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4209
                               label_no);
4210
  begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4211
 
4212
  if (TARGET_64BIT)
4213
    emit_move_insn (arg_pointer_rtx,
4214
                    gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4215
                                  GEN_INT (64)));
4216
 
4217
  emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4218
 
4219
  /* The address of the function is loaded into %r25 with a instruction-
4220
     relative sequence that avoids the use of relocations.  The sequence
4221
     is split so that the load_offset_label_address instruction can
4222
     occupy the delay slot of the call to _mcount.  */
4223
  if (TARGET_PA_20)
4224
    emit_insn (gen_lcla2 (reg, label_rtx));
4225
  else
4226
    emit_insn (gen_lcla1 (reg, label_rtx));
4227
 
4228
  emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4229
                                            reg, begin_label_rtx, label_rtx));
4230
 
4231
#if !NO_DEFERRED_PROFILE_COUNTERS
4232
  {
4233
    rtx count_label_rtx, addr, r24;
4234
    char count_label_name[16];
4235
 
4236
    VEC_safe_push (int, heap, funcdef_nos, label_no);
4237
    ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4238
    count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
4239
 
4240
    addr = force_reg (Pmode, count_label_rtx);
4241
    r24 = gen_rtx_REG (Pmode, 24);
4242
    emit_move_insn (r24, addr);
4243
 
4244
    call_insn =
4245
      emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4246
                                             gen_rtx_SYMBOL_REF (Pmode,
4247
                                                                 "_mcount")),
4248
                                GEN_INT (TARGET_64BIT ? 24 : 12)));
4249
 
4250
    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4251
  }
4252
#else
4253
 
4254
  call_insn =
4255
    emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4256
                                           gen_rtx_SYMBOL_REF (Pmode,
4257
                                                               "_mcount")),
4258
                              GEN_INT (TARGET_64BIT ? 16 : 8)));
4259
 
4260
#endif
4261
 
4262
  use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4263
  use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4264
 
4265
  /* Indicate the _mcount call cannot throw, nor will it execute a
4266
     non-local goto.  */
4267
  REG_NOTES (call_insn)
4268
    = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
4269
}
4270
 
4271
/* Fetch the return address for the frame COUNT steps up from
4272
   the current frame, after the prologue.  FRAMEADDR is the
4273
   frame pointer of the COUNT frame.
4274
 
4275
   We want to ignore any export stub remnants here.  To handle this,
4276
   we examine the code at the return address, and if it is an export
4277
   stub, we return a memory rtx for the stub return address stored
4278
   at frame-24.
4279
 
4280
   The value returned is used in two different ways:
4281
 
4282
        1. To find a function's caller.
4283
 
4284
        2. To change the return address for a function.
4285
 
4286
   This function handles most instances of case 1; however, it will
4287
   fail if there are two levels of stubs to execute on the return
4288
   path.  The only way I believe that can happen is if the return value
4289
   needs a parameter relocation, which never happens for C code.
4290
 
4291
   This function handles most instances of case 2; however, it will
4292
   fail if we did not originally have stub code on the return path
4293
   but will need stub code on the new return path.  This can happen if
4294
   the caller & callee are both in the main program, but the new
4295
   return location is in a shared library.  */
4296
 
4297
rtx
4298
return_addr_rtx (int count, rtx frameaddr)
4299
{
4300
  rtx label;
4301
  rtx rp;
4302
  rtx saved_rp;
4303
  rtx ins;
4304
 
4305
  if (count != 0)
4306
    return NULL_RTX;
4307
 
4308
  rp = get_hard_reg_initial_val (Pmode, 2);
4309
 
4310
  if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4311
    return rp;
4312
 
4313
  saved_rp = gen_reg_rtx (Pmode);
4314
  emit_move_insn (saved_rp, rp);
4315
 
4316
  /* Get pointer to the instruction stream.  We have to mask out the
4317
     privilege level from the two low order bits of the return address
4318
     pointer here so that ins will point to the start of the first
4319
     instruction that would have been executed if we returned.  */
4320
  ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4321
  label = gen_label_rtx ();
4322
 
4323
  /* Check the instruction stream at the normal return address for the
4324
     export stub:
4325
 
4326
        0x4bc23fd1 | stub+8:   ldw -18(sr0,sp),rp
4327
        0x004010a1 | stub+12:  ldsid (sr0,rp),r1
4328
        0x00011820 | stub+16:  mtsp r1,sr0
4329
        0xe0400002 | stub+20:  be,n 0(sr0,rp)
4330
 
4331
     If it is an export stub, than our return address is really in
4332
     -24[frameaddr].  */
4333
 
4334
  emit_cmp_insn (gen_rtx_MEM (SImode, ins), GEN_INT (0x4bc23fd1), NE,
4335
                 NULL_RTX, SImode, 1);
4336
  emit_jump_insn (gen_bne (label));
4337
 
4338
  emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)),
4339
                 GEN_INT (0x004010a1), NE, NULL_RTX, SImode, 1);
4340
  emit_jump_insn (gen_bne (label));
4341
 
4342
  emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)),
4343
                 GEN_INT (0x00011820), NE, NULL_RTX, SImode, 1);
4344
  emit_jump_insn (gen_bne (label));
4345
 
4346
  /* 0xe0400002 must be specified as -532676606 so that it won't be
4347
     rejected as an invalid immediate operand on 64-bit hosts.  */
4348
  emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)),
4349
                 GEN_INT (-532676606), NE, NULL_RTX, SImode, 1);
4350
 
4351
  /* If there is no export stub then just use the value saved from
4352
     the return pointer register.  */
4353
 
4354
  emit_jump_insn (gen_bne (label));
4355
 
4356
  /* Here we know that our return address points to an export
4357
     stub.  We don't want to return the address of the export stub,
4358
     but rather the return address of the export stub.  That return
4359
     address is stored at -24[frameaddr].  */
4360
 
4361
  emit_move_insn (saved_rp,
4362
                  gen_rtx_MEM (Pmode,
4363
                               memory_address (Pmode,
4364
                                               plus_constant (frameaddr,
4365
                                                              -24))));
4366
 
4367
  emit_label (label);
4368
  return saved_rp;
4369
}
4370
 
4371
/* This is only valid once reload has completed because it depends on
4372
   knowing exactly how much (if any) frame there is and...
4373
 
4374
   It's only valid if there is no frame marker to de-allocate and...
4375
 
4376
   It's only valid if %r2 hasn't been saved into the caller's frame
4377
   (we're not profiling and %r2 isn't live anywhere).  */
4378
int
4379
hppa_can_use_return_insn_p (void)
4380
{
4381
  return (reload_completed
4382
          && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
4383
          && ! regs_ever_live[2]
4384
          && ! frame_pointer_needed);
4385
}
4386
 
4387
void
4388
emit_bcond_fp (enum rtx_code code, rtx operand0)
4389
{
4390
  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4391
                               gen_rtx_IF_THEN_ELSE (VOIDmode,
4392
                                                     gen_rtx_fmt_ee (code,
4393
                                                              VOIDmode,
4394
                                                              gen_rtx_REG (CCFPmode, 0),
4395
                                                              const0_rtx),
4396
                                                     gen_rtx_LABEL_REF (VOIDmode, operand0),
4397
                                                     pc_rtx)));
4398
 
4399
}
4400
 
4401
rtx
4402
gen_cmp_fp (enum rtx_code code, rtx operand0, rtx operand1)
4403
{
4404
  return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4405
                      gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1));
4406
}
4407
 
4408
/* Adjust the cost of a scheduling dependency.  Return the new cost of
4409
   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
4410
 
4411
static int
4412
pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
4413
{
4414
  enum attr_type attr_type;
4415
 
4416
  /* Don't adjust costs for a pa8000 chip, also do not adjust any
4417
     true dependencies as they are described with bypasses now.  */
4418
  if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
4419
    return cost;
4420
 
4421
  if (! recog_memoized (insn))
4422
    return 0;
4423
 
4424
  attr_type = get_attr_type (insn);
4425
 
4426
  switch (REG_NOTE_KIND (link))
4427
    {
4428
    case REG_DEP_ANTI:
4429
      /* Anti dependency; DEP_INSN reads a register that INSN writes some
4430
         cycles later.  */
4431
 
4432
      if (attr_type == TYPE_FPLOAD)
4433
        {
4434
          rtx pat = PATTERN (insn);
4435
          rtx dep_pat = PATTERN (dep_insn);
4436
          if (GET_CODE (pat) == PARALLEL)
4437
            {
4438
              /* This happens for the fldXs,mb patterns.  */
4439
              pat = XVECEXP (pat, 0, 0);
4440
            }
4441
          if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4442
            /* If this happens, we have to extend this to schedule
4443
               optimally.  Return 0 for now.  */
4444
          return 0;
4445
 
4446
          if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4447
            {
4448
              if (! recog_memoized (dep_insn))
4449
                return 0;
4450
              switch (get_attr_type (dep_insn))
4451
                {
4452
                case TYPE_FPALU:
4453
                case TYPE_FPMULSGL:
4454
                case TYPE_FPMULDBL:
4455
                case TYPE_FPDIVSGL:
4456
                case TYPE_FPDIVDBL:
4457
                case TYPE_FPSQRTSGL:
4458
                case TYPE_FPSQRTDBL:
4459
                  /* A fpload can't be issued until one cycle before a
4460
                     preceding arithmetic operation has finished if
4461
                     the target of the fpload is any of the sources
4462
                     (or destination) of the arithmetic operation.  */
4463
                  return insn_default_latency (dep_insn) - 1;
4464
 
4465
                default:
4466
                  return 0;
4467
                }
4468
            }
4469
        }
4470
      else if (attr_type == TYPE_FPALU)
4471
        {
4472
          rtx pat = PATTERN (insn);
4473
          rtx dep_pat = PATTERN (dep_insn);
4474
          if (GET_CODE (pat) == PARALLEL)
4475
            {
4476
              /* This happens for the fldXs,mb patterns.  */
4477
              pat = XVECEXP (pat, 0, 0);
4478
            }
4479
          if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4480
            /* If this happens, we have to extend this to schedule
4481
               optimally.  Return 0 for now.  */
4482
          return 0;
4483
 
4484
          if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4485
            {
4486
              if (! recog_memoized (dep_insn))
4487
                return 0;
4488
              switch (get_attr_type (dep_insn))
4489
                {
4490
                case TYPE_FPDIVSGL:
4491
                case TYPE_FPDIVDBL:
4492
                case TYPE_FPSQRTSGL:
4493
                case TYPE_FPSQRTDBL:
4494
                  /* An ALU flop can't be issued until two cycles before a
4495
                     preceding divide or sqrt operation has finished if
4496
                     the target of the ALU flop is any of the sources
4497
                     (or destination) of the divide or sqrt operation.  */
4498
                  return insn_default_latency (dep_insn) - 2;
4499
 
4500
                default:
4501
                  return 0;
4502
                }
4503
            }
4504
        }
4505
 
4506
      /* For other anti dependencies, the cost is 0.  */
4507
      return 0;
4508
 
4509
    case REG_DEP_OUTPUT:
4510
      /* Output dependency; DEP_INSN writes a register that INSN writes some
4511
         cycles later.  */
4512
      if (attr_type == TYPE_FPLOAD)
4513
        {
4514
          rtx pat = PATTERN (insn);
4515
          rtx dep_pat = PATTERN (dep_insn);
4516
          if (GET_CODE (pat) == PARALLEL)
4517
            {
4518
              /* This happens for the fldXs,mb patterns.  */
4519
              pat = XVECEXP (pat, 0, 0);
4520
            }
4521
          if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4522
            /* If this happens, we have to extend this to schedule
4523
               optimally.  Return 0 for now.  */
4524
          return 0;
4525
 
4526
          if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4527
            {
4528
              if (! recog_memoized (dep_insn))
4529
                return 0;
4530
              switch (get_attr_type (dep_insn))
4531
                {
4532
                case TYPE_FPALU:
4533
                case TYPE_FPMULSGL:
4534
                case TYPE_FPMULDBL:
4535
                case TYPE_FPDIVSGL:
4536
                case TYPE_FPDIVDBL:
4537
                case TYPE_FPSQRTSGL:
4538
                case TYPE_FPSQRTDBL:
4539
                  /* A fpload can't be issued until one cycle before a
4540
                     preceding arithmetic operation has finished if
4541
                     the target of the fpload is the destination of the
4542
                     arithmetic operation.
4543
 
4544
                     Exception: For PA7100LC, PA7200 and PA7300, the cost
4545
                     is 3 cycles, unless they bundle together.   We also
4546
                     pay the penalty if the second insn is a fpload.  */
4547
                  return insn_default_latency (dep_insn) - 1;
4548
 
4549
                default:
4550
                  return 0;
4551
                }
4552
            }
4553
        }
4554
      else if (attr_type == TYPE_FPALU)
4555
        {
4556
          rtx pat = PATTERN (insn);
4557
          rtx dep_pat = PATTERN (dep_insn);
4558
          if (GET_CODE (pat) == PARALLEL)
4559
            {
4560
              /* This happens for the fldXs,mb patterns.  */
4561
              pat = XVECEXP (pat, 0, 0);
4562
            }
4563
          if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4564
            /* If this happens, we have to extend this to schedule
4565
               optimally.  Return 0 for now.  */
4566
          return 0;
4567
 
4568
          if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4569
            {
4570
              if (! recog_memoized (dep_insn))
4571
                return 0;
4572
              switch (get_attr_type (dep_insn))
4573
                {
4574
                case TYPE_FPDIVSGL:
4575
                case TYPE_FPDIVDBL:
4576
                case TYPE_FPSQRTSGL:
4577
                case TYPE_FPSQRTDBL:
4578
                  /* An ALU flop can't be issued until two cycles before a
4579
                     preceding divide or sqrt operation has finished if
4580
                     the target of the ALU flop is also the target of
4581
                     the divide or sqrt operation.  */
4582
                  return insn_default_latency (dep_insn) - 2;
4583
 
4584
                default:
4585
                  return 0;
4586
                }
4587
            }
4588
        }
4589
 
4590
      /* For other output dependencies, the cost is 0.  */
4591
      return 0;
4592
 
4593
    default:
4594
      gcc_unreachable ();
4595
    }
4596
}
4597
 
4598
/* Adjust scheduling priorities.  We use this to try and keep addil
4599
   and the next use of %r1 close together.  */
4600
static int
4601
pa_adjust_priority (rtx insn, int priority)
4602
{
4603
  rtx set = single_set (insn);
4604
  rtx src, dest;
4605
  if (set)
4606
    {
4607
      src = SET_SRC (set);
4608
      dest = SET_DEST (set);
4609
      if (GET_CODE (src) == LO_SUM
4610
          && symbolic_operand (XEXP (src, 1), VOIDmode)
4611
          && ! read_only_operand (XEXP (src, 1), VOIDmode))
4612
        priority >>= 3;
4613
 
4614
      else if (GET_CODE (src) == MEM
4615
               && GET_CODE (XEXP (src, 0)) == LO_SUM
4616
               && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4617
               && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4618
        priority >>= 1;
4619
 
4620
      else if (GET_CODE (dest) == MEM
4621
               && GET_CODE (XEXP (dest, 0)) == LO_SUM
4622
               && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4623
               && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4624
        priority >>= 3;
4625
    }
4626
  return priority;
4627
}
4628
 
4629
/* The 700 can only issue a single insn at a time.
4630
   The 7XXX processors can issue two insns at a time.
4631
   The 8000 can issue 4 insns at a time.  */
4632
static int
4633
pa_issue_rate (void)
4634
{
4635
  switch (pa_cpu)
4636
    {
4637
    case PROCESSOR_700:         return 1;
4638
    case PROCESSOR_7100:        return 2;
4639
    case PROCESSOR_7100LC:      return 2;
4640
    case PROCESSOR_7200:        return 2;
4641
    case PROCESSOR_7300:        return 2;
4642
    case PROCESSOR_8000:        return 4;
4643
 
4644
    default:
4645
      gcc_unreachable ();
4646
    }
4647
}
4648
 
4649
 
4650
 
4651
/* Return any length adjustment needed by INSN which already has its length
4652
   computed as LENGTH.   Return zero if no adjustment is necessary.
4653
 
4654
   For the PA: function calls, millicode calls, and backwards short
4655
   conditional branches with unfilled delay slots need an adjustment by +1
4656
   (to account for the NOP which will be inserted into the instruction stream).
4657
 
4658
   Also compute the length of an inline block move here as it is too
4659
   complicated to express as a length attribute in pa.md.  */
4660
int
4661
pa_adjust_insn_length (rtx insn, int length)
4662
{
4663
  rtx pat = PATTERN (insn);
4664
 
4665
  /* Jumps inside switch tables which have unfilled delay slots need
4666
     adjustment.  */
4667
  if (GET_CODE (insn) == JUMP_INSN
4668
      && GET_CODE (pat) == PARALLEL
4669
      && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
4670
    return 4;
4671
  /* Millicode insn with an unfilled delay slot.  */
4672
  else if (GET_CODE (insn) == INSN
4673
           && GET_CODE (pat) != SEQUENCE
4674
           && GET_CODE (pat) != USE
4675
           && GET_CODE (pat) != CLOBBER
4676
           && get_attr_type (insn) == TYPE_MILLI)
4677
    return 4;
4678
  /* Block move pattern.  */
4679
  else if (GET_CODE (insn) == INSN
4680
           && GET_CODE (pat) == PARALLEL
4681
           && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4682
           && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4683
           && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4684
           && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4685
           && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
4686
    return compute_movmem_length (insn) - 4;
4687
  /* Block clear pattern.  */
4688
  else if (GET_CODE (insn) == INSN
4689
           && GET_CODE (pat) == PARALLEL
4690
           && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4691
           && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4692
           && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4693
           && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
4694
    return compute_clrmem_length (insn) - 4;
4695
  /* Conditional branch with an unfilled delay slot.  */
4696
  else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4697
    {
4698
      /* Adjust a short backwards conditional with an unfilled delay slot.  */
4699
      if (GET_CODE (pat) == SET
4700
          && length == 4
4701
          && ! forward_branch_p (insn))
4702
        return 4;
4703
      else if (GET_CODE (pat) == PARALLEL
4704
               && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4705
               && length == 4)
4706
        return 4;
4707
      /* Adjust dbra insn with short backwards conditional branch with
4708
         unfilled delay slot -- only for case where counter is in a
4709
         general register register.  */
4710
      else if (GET_CODE (pat) == PARALLEL
4711
               && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4712
               && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
4713
               && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
4714
               && length == 4
4715
               && ! forward_branch_p (insn))
4716
        return 4;
4717
      else
4718
        return 0;
4719
    }
4720
  return 0;
4721
}
4722
 
4723
/* Print operand X (an rtx) in assembler syntax to file FILE.
4724
   CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4725
   For `%' followed by punctuation, CODE is the punctuation and X is null.  */
4726
 
4727
void
4728
print_operand (FILE *file, rtx x, int code)
4729
{
4730
  switch (code)
4731
    {
4732
    case '#':
4733
      /* Output a 'nop' if there's nothing for the delay slot.  */
4734
      if (dbr_sequence_length () == 0)
4735
        fputs ("\n\tnop", file);
4736
      return;
4737
    case '*':
4738
      /* Output a nullification completer if there's nothing for the */
4739
      /* delay slot or nullification is requested.  */
4740
      if (dbr_sequence_length () == 0 ||
4741
          (final_sequence &&
4742
           INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4743
        fputs (",n", file);
4744
      return;
4745
    case 'R':
4746
      /* Print out the second register name of a register pair.
4747
         I.e., R (6) => 7.  */
4748
      fputs (reg_names[REGNO (x) + 1], file);
4749
      return;
4750
    case 'r':
4751
      /* A register or zero.  */
4752
      if (x == const0_rtx
4753
          || (x == CONST0_RTX (DFmode))
4754
          || (x == CONST0_RTX (SFmode)))
4755
        {
4756
          fputs ("%r0", file);
4757
          return;
4758
        }
4759
      else
4760
        break;
4761
    case 'f':
4762
      /* A register or zero (floating point).  */
4763
      if (x == const0_rtx
4764
          || (x == CONST0_RTX (DFmode))
4765
          || (x == CONST0_RTX (SFmode)))
4766
        {
4767
          fputs ("%fr0", file);
4768
          return;
4769
        }
4770
      else
4771
        break;
4772
    case 'A':
4773
      {
4774
        rtx xoperands[2];
4775
 
4776
        xoperands[0] = XEXP (XEXP (x, 0), 0);
4777
        xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4778
        output_global_address (file, xoperands[1], 0);
4779
        fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4780
        return;
4781
      }
4782
 
4783
    case 'C':                   /* Plain (C)ondition */
4784
    case 'X':
4785
      switch (GET_CODE (x))
4786
        {
4787
        case EQ:
4788
          fputs ("=", file);  break;
4789
        case NE:
4790
          fputs ("<>", file);  break;
4791
        case GT:
4792
          fputs (">", file);  break;
4793
        case GE:
4794
          fputs (">=", file);  break;
4795
        case GEU:
4796
          fputs (">>=", file);  break;
4797
        case GTU:
4798
          fputs (">>", file);  break;
4799
        case LT:
4800
          fputs ("<", file);  break;
4801
        case LE:
4802
          fputs ("<=", file);  break;
4803
        case LEU:
4804
          fputs ("<<=", file);  break;
4805
        case LTU:
4806
          fputs ("<<", file);  break;
4807
        default:
4808
          gcc_unreachable ();
4809
        }
4810
      return;
4811
    case 'N':                   /* Condition, (N)egated */
4812
      switch (GET_CODE (x))
4813
        {
4814
        case EQ:
4815
          fputs ("<>", file);  break;
4816
        case NE:
4817
          fputs ("=", file);  break;
4818
        case GT:
4819
          fputs ("<=", file);  break;
4820
        case GE:
4821
          fputs ("<", file);  break;
4822
        case GEU:
4823
          fputs ("<<", file);  break;
4824
        case GTU:
4825
          fputs ("<<=", file);  break;
4826
        case LT:
4827
          fputs (">=", file);  break;
4828
        case LE:
4829
          fputs (">", file);  break;
4830
        case LEU:
4831
          fputs (">>", file);  break;
4832
        case LTU:
4833
          fputs (">>=", file);  break;
4834
        default:
4835
          gcc_unreachable ();
4836
        }
4837
      return;
4838
    /* For floating point comparisons.  Note that the output
4839
       predicates are the complement of the desired mode.  The
4840
       conditions for GT, GE, LT, LE and LTGT cause an invalid
4841
       operation exception if the result is unordered and this
4842
       exception is enabled in the floating-point status register.  */
4843
    case 'Y':
4844
      switch (GET_CODE (x))
4845
        {
4846
        case EQ:
4847
          fputs ("!=", file);  break;
4848
        case NE:
4849
          fputs ("=", file);  break;
4850
        case GT:
4851
          fputs ("!>", file);  break;
4852
        case GE:
4853
          fputs ("!>=", file);  break;
4854
        case LT:
4855
          fputs ("!<", file);  break;
4856
        case LE:
4857
          fputs ("!<=", file);  break;
4858
        case LTGT:
4859
          fputs ("!<>", file);  break;
4860
        case UNLE:
4861
          fputs ("!?<=", file);  break;
4862
        case UNLT:
4863
          fputs ("!?<", file);  break;
4864
        case UNGE:
4865
          fputs ("!?>=", file);  break;
4866
        case UNGT:
4867
          fputs ("!?>", file);  break;
4868
        case UNEQ:
4869
          fputs ("!?=", file);  break;
4870
        case UNORDERED:
4871
          fputs ("!?", file);  break;
4872
        case ORDERED:
4873
          fputs ("?", file);  break;
4874
        default:
4875
          gcc_unreachable ();
4876
        }
4877
      return;
4878
    case 'S':                   /* Condition, operands are (S)wapped.  */
4879
      switch (GET_CODE (x))
4880
        {
4881
        case EQ:
4882
          fputs ("=", file);  break;
4883
        case NE:
4884
          fputs ("<>", file);  break;
4885
        case GT:
4886
          fputs ("<", file);  break;
4887
        case GE:
4888
          fputs ("<=", file);  break;
4889
        case GEU:
4890
          fputs ("<<=", file);  break;
4891
        case GTU:
4892
          fputs ("<<", file);  break;
4893
        case LT:
4894
          fputs (">", file);  break;
4895
        case LE:
4896
          fputs (">=", file);  break;
4897
        case LEU:
4898
          fputs (">>=", file);  break;
4899
        case LTU:
4900
          fputs (">>", file);  break;
4901
        default:
4902
          gcc_unreachable ();
4903
        }
4904
      return;
4905
    case 'B':                   /* Condition, (B)oth swapped and negate.  */
4906
      switch (GET_CODE (x))
4907
        {
4908
        case EQ:
4909
          fputs ("<>", file);  break;
4910
        case NE:
4911
          fputs ("=", file);  break;
4912
        case GT:
4913
          fputs (">=", file);  break;
4914
        case GE:
4915
          fputs (">", file);  break;
4916
        case GEU:
4917
          fputs (">>", file);  break;
4918
        case GTU:
4919
          fputs (">>=", file);  break;
4920
        case LT:
4921
          fputs ("<=", file);  break;
4922
        case LE:
4923
          fputs ("<", file);  break;
4924
        case LEU:
4925
          fputs ("<<", file);  break;
4926
        case LTU:
4927
          fputs ("<<=", file);  break;
4928
        default:
4929
          gcc_unreachable ();
4930
        }
4931
      return;
4932
    case 'k':
4933
      gcc_assert (GET_CODE (x) == CONST_INT);
4934
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
4935
      return;
4936
    case 'Q':
4937
      gcc_assert (GET_CODE (x) == CONST_INT);
4938
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
4939
      return;
4940
    case 'L':
4941
      gcc_assert (GET_CODE (x) == CONST_INT);
4942
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
4943
      return;
4944
    case 'O':
4945
      gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
4946
      fprintf (file, "%d", exact_log2 (INTVAL (x)));
4947
      return;
4948
    case 'p':
4949
      gcc_assert (GET_CODE (x) == CONST_INT);
4950
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
4951
      return;
4952
    case 'P':
4953
      gcc_assert (GET_CODE (x) == CONST_INT);
4954
      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
4955
      return;
4956
    case 'I':
4957
      if (GET_CODE (x) == CONST_INT)
4958
        fputs ("i", file);
4959
      return;
4960
    case 'M':
4961
    case 'F':
4962
      switch (GET_CODE (XEXP (x, 0)))
4963
        {
4964
        case PRE_DEC:
4965
        case PRE_INC:
4966
          if (ASSEMBLER_DIALECT == 0)
4967
            fputs ("s,mb", file);
4968
          else
4969
            fputs (",mb", file);
4970
          break;
4971
        case POST_DEC:
4972
        case POST_INC:
4973
          if (ASSEMBLER_DIALECT == 0)
4974
            fputs ("s,ma", file);
4975
          else
4976
            fputs (",ma", file);
4977
          break;
4978
        case PLUS:
4979
          if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
4980
              && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
4981
            {
4982
              if (ASSEMBLER_DIALECT == 0)
4983
                fputs ("x", file);
4984
            }
4985
          else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4986
                   || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
4987
            {
4988
              if (ASSEMBLER_DIALECT == 0)
4989
                fputs ("x,s", file);
4990
              else
4991
                fputs (",s", file);
4992
            }
4993
          else if (code == 'F' && ASSEMBLER_DIALECT == 0)
4994
            fputs ("s", file);
4995
          break;
4996
        default:
4997
          if (code == 'F' && ASSEMBLER_DIALECT == 0)
4998
            fputs ("s", file);
4999
          break;
5000
        }
5001
      return;
5002
    case 'G':
5003
      output_global_address (file, x, 0);
5004
      return;
5005
    case 'H':
5006
      output_global_address (file, x, 1);
5007
      return;
5008
    case 0:                      /* Don't do anything special */
5009
      break;
5010
    case 'Z':
5011
      {
5012
        unsigned op[3];
5013
        compute_zdepwi_operands (INTVAL (x), op);
5014
        fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5015
        return;
5016
      }
5017
    case 'z':
5018
      {
5019
        unsigned op[3];
5020
        compute_zdepdi_operands (INTVAL (x), op);
5021
        fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5022
        return;
5023
      }
5024
    case 'c':
5025
      /* We can get here from a .vtable_inherit due to our
5026
         CONSTANT_ADDRESS_P rejecting perfectly good constant
5027
         addresses.  */
5028
      break;
5029
    default:
5030
      gcc_unreachable ();
5031
    }
5032
  if (GET_CODE (x) == REG)
5033
    {
5034
      fputs (reg_names [REGNO (x)], file);
5035
      if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5036
        {
5037
          fputs ("R", file);
5038
          return;
5039
        }
5040
      if (FP_REG_P (x)
5041
          && GET_MODE_SIZE (GET_MODE (x)) <= 4
5042
          && (REGNO (x) & 1) == 0)
5043
        fputs ("L", file);
5044
    }
5045
  else if (GET_CODE (x) == MEM)
5046
    {
5047
      int size = GET_MODE_SIZE (GET_MODE (x));
5048
      rtx base = NULL_RTX;
5049
      switch (GET_CODE (XEXP (x, 0)))
5050
        {
5051
        case PRE_DEC:
5052
        case POST_DEC:
5053
          base = XEXP (XEXP (x, 0), 0);
5054
          fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5055
          break;
5056
        case PRE_INC:
5057
        case POST_INC:
5058
          base = XEXP (XEXP (x, 0), 0);
5059
          fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5060
          break;
5061
        case PLUS:
5062
          if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5063
            fprintf (file, "%s(%s)",
5064
                     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5065
                     reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5066
          else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5067
            fprintf (file, "%s(%s)",
5068
                     reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5069
                     reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5070
          else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5071
                   && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5072
            {
5073
              /* Because the REG_POINTER flag can get lost during reload,
5074
                 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5075
                 index and base registers in the combined move patterns.  */
5076
              rtx base = XEXP (XEXP (x, 0), 1);
5077
              rtx index = XEXP (XEXP (x, 0), 0);
5078
 
5079
              fprintf (file, "%s(%s)",
5080
                       reg_names [REGNO (index)], reg_names [REGNO (base)]);
5081
            }
5082
          else
5083
            output_address (XEXP (x, 0));
5084
          break;
5085
        default:
5086
          output_address (XEXP (x, 0));
5087
          break;
5088
        }
5089
    }
5090
  else
5091
    output_addr_const (file, x);
5092
}
5093
 
5094
/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF.  */
5095
 
5096
void
5097
output_global_address (FILE *file, rtx x, int round_constant)
5098
{
5099
 
5100
  /* Imagine  (high (const (plus ...))).  */
5101
  if (GET_CODE (x) == HIGH)
5102
    x = XEXP (x, 0);
5103
 
5104
  if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5105
    output_addr_const (file, x);
5106
  else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5107
    {
5108
      output_addr_const (file, x);
5109
      fputs ("-$global$", file);
5110
    }
5111
  else if (GET_CODE (x) == CONST)
5112
    {
5113
      const char *sep = "";
5114
      int offset = 0;            /* assembler wants -$global$ at end */
5115
      rtx base = NULL_RTX;
5116
 
5117
      switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5118
        {
5119
        case SYMBOL_REF:
5120
          base = XEXP (XEXP (x, 0), 0);
5121
          output_addr_const (file, base);
5122
          break;
5123
        case CONST_INT:
5124
          offset = INTVAL (XEXP (XEXP (x, 0), 0));
5125
          break;
5126
        default:
5127
          gcc_unreachable ();
5128
        }
5129
 
5130
      switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5131
        {
5132
        case SYMBOL_REF:
5133
          base = XEXP (XEXP (x, 0), 1);
5134
          output_addr_const (file, base);
5135
          break;
5136
        case CONST_INT:
5137
          offset = INTVAL (XEXP (XEXP (x, 0), 1));
5138
          break;
5139
        default:
5140
          gcc_unreachable ();
5141
        }
5142
 
5143
      /* How bogus.  The compiler is apparently responsible for
5144
         rounding the constant if it uses an LR field selector.
5145
 
5146
         The linker and/or assembler seem a better place since
5147
         they have to do this kind of thing already.
5148
 
5149
         If we fail to do this, HP's optimizing linker may eliminate
5150
         an addil, but not update the ldw/stw/ldo instruction that
5151
         uses the result of the addil.  */
5152
      if (round_constant)
5153
        offset = ((offset + 0x1000) & ~0x1fff);
5154
 
5155
      switch (GET_CODE (XEXP (x, 0)))
5156
        {
5157
        case PLUS:
5158
          if (offset < 0)
5159
            {
5160
              offset = -offset;
5161
              sep = "-";
5162
            }
5163
          else
5164
            sep = "+";
5165
          break;
5166
 
5167
        case MINUS:
5168
          gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5169
          sep = "-";
5170
          break;
5171
 
5172
        default:
5173
          gcc_unreachable ();
5174
        }
5175
 
5176
      if (!read_only_operand (base, VOIDmode) && !flag_pic)
5177
        fputs ("-$global$", file);
5178
      if (offset)
5179
        fprintf (file, "%s%d", sep, offset);
5180
    }
5181
  else
5182
    output_addr_const (file, x);
5183
}
5184
 
5185
/* Output boilerplate text to appear at the beginning of the file.
5186
   There are several possible versions.  */
5187
#define aputs(x) fputs(x, asm_out_file)
5188
static inline void
5189
pa_file_start_level (void)
5190
{
5191
  if (TARGET_64BIT)
5192
    aputs ("\t.LEVEL 2.0w\n");
5193
  else if (TARGET_PA_20)
5194
    aputs ("\t.LEVEL 2.0\n");
5195
  else if (TARGET_PA_11)
5196
    aputs ("\t.LEVEL 1.1\n");
5197
  else
5198
    aputs ("\t.LEVEL 1.0\n");
5199
}
5200
 
5201
static inline void
5202
pa_file_start_space (int sortspace)
5203
{
5204
  aputs ("\t.SPACE $PRIVATE$");
5205
  if (sortspace)
5206
    aputs (",SORT=16");
5207
  aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5208
         "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5209
         "\n\t.SPACE $TEXT$");
5210
  if (sortspace)
5211
    aputs (",SORT=8");
5212
  aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5213
         "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5214
}
5215
 
5216
static inline void
5217
pa_file_start_file (int want_version)
5218
{
5219
  if (write_symbols != NO_DEBUG)
5220
    {
5221
      output_file_directive (asm_out_file, main_input_filename);
5222
      if (want_version)
5223
        aputs ("\t.version\t\"01.01\"\n");
5224
    }
5225
}
5226
 
5227
static inline void
5228
pa_file_start_mcount (const char *aswhat)
5229
{
5230
  if (profile_flag)
5231
    fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5232
}
5233
 
5234
static void
5235
pa_elf_file_start (void)
5236
{
5237
  pa_file_start_level ();
5238
  pa_file_start_mcount ("ENTRY");
5239
  pa_file_start_file (0);
5240
}
5241
 
5242
static void
5243
pa_som_file_start (void)
5244
{
5245
  pa_file_start_level ();
5246
  pa_file_start_space (0);
5247
  aputs ("\t.IMPORT $global$,DATA\n"
5248
         "\t.IMPORT $$dyncall,MILLICODE\n");
5249
  pa_file_start_mcount ("CODE");
5250
  pa_file_start_file (0);
5251
}
5252
 
5253
static void
5254
pa_linux_file_start (void)
5255
{
5256
  pa_file_start_file (1);
5257
  pa_file_start_level ();
5258
  pa_file_start_mcount ("CODE");
5259
}
5260
 
5261
static void
5262
pa_hpux64_gas_file_start (void)
5263
{
5264
  pa_file_start_level ();
5265
#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5266
  if (profile_flag)
5267
    ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5268
#endif
5269
  pa_file_start_file (1);
5270
}
5271
 
5272
static void
5273
pa_hpux64_hpas_file_start (void)
5274
{
5275
  pa_file_start_level ();
5276
  pa_file_start_space (1);
5277
  pa_file_start_mcount ("CODE");
5278
  pa_file_start_file (0);
5279
}
5280
#undef aputs
5281
 
5282
/* Search the deferred plabel list for SYMBOL and return its internal
5283
   label.  If an entry for SYMBOL is not found, a new entry is created.  */
5284
 
5285
rtx
5286
get_deferred_plabel (rtx symbol)
5287
{
5288
  const char *fname = XSTR (symbol, 0);
5289
  size_t i;
5290
 
5291
  /* See if we have already put this function on the list of deferred
5292
     plabels.  This list is generally small, so a liner search is not
5293
     too ugly.  If it proves too slow replace it with something faster.  */
5294
  for (i = 0; i < n_deferred_plabels; i++)
5295
    if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5296
      break;
5297
 
5298
  /* If the deferred plabel list is empty, or this entry was not found
5299
     on the list, create a new entry on the list.  */
5300
  if (deferred_plabels == NULL || i == n_deferred_plabels)
5301
    {
5302
      tree id;
5303
 
5304
      if (deferred_plabels == 0)
5305
        deferred_plabels = (struct deferred_plabel *)
5306
          ggc_alloc (sizeof (struct deferred_plabel));
5307
      else
5308
        deferred_plabels = (struct deferred_plabel *)
5309
          ggc_realloc (deferred_plabels,
5310
                       ((n_deferred_plabels + 1)
5311
                        * sizeof (struct deferred_plabel)));
5312
 
5313
      i = n_deferred_plabels++;
5314
      deferred_plabels[i].internal_label = gen_label_rtx ();
5315
      deferred_plabels[i].symbol = symbol;
5316
 
5317
      /* Gross.  We have just implicitly taken the address of this
5318
         function.  Mark it in the same manner as assemble_name.  */
5319
      id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5320
      if (id)
5321
        mark_referenced (id);
5322
    }
5323
 
5324
  return deferred_plabels[i].internal_label;
5325
}
5326
 
5327
static void
5328
output_deferred_plabels (void)
5329
{
5330
  size_t i;
5331
 
5332
  /* If we have some deferred plabels, then we need to switch into the
5333
     data or readonly data section, and align it to a 4 byte boundary
5334
     before outputting the deferred plabels.  */
5335
  if (n_deferred_plabels)
5336
    {
5337
      switch_to_section (flag_pic ? data_section : readonly_data_section);
5338
      ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5339
    }
5340
 
5341
  /* Now output the deferred plabels.  */
5342
  for (i = 0; i < n_deferred_plabels; i++)
5343
    {
5344
      (*targetm.asm_out.internal_label) (asm_out_file, "L",
5345
                 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5346
      assemble_integer (deferred_plabels[i].symbol,
5347
                        TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5348
    }
5349
}
5350
 
5351
#ifdef HPUX_LONG_DOUBLE_LIBRARY
5352
/* Initialize optabs to point to HPUX long double emulation routines.  */
5353
static void
5354
pa_hpux_init_libfuncs (void)
5355
{
5356
  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5357
  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5358
  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5359
  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5360
  set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5361
  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5362
  set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5363
  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5364
  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5365
 
5366
  set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5367
  set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5368
  set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5369
  set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5370
  set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5371
  set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5372
  set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5373
 
5374
  set_conv_libfunc (sext_optab,   TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5375
  set_conv_libfunc (sext_optab,   TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5376
  set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5377
  set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5378
 
5379
  set_conv_libfunc (sfix_optab,   SImode, TFmode, TARGET_64BIT
5380
                                                  ? "__U_Qfcnvfxt_quad_to_sgl"
5381
                                                  : "_U_Qfcnvfxt_quad_to_sgl");
5382
  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5383
  set_conv_libfunc (ufix_optab,   SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5384
  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5385
 
5386
  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5387
  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
5388
  set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5389
  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
5390
}
5391
#endif
5392
 
5393
/* HP's millicode routines mean something special to the assembler.
5394
   Keep track of which ones we have used.  */
5395
 
5396
enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5397
static void import_milli (enum millicodes);
5398
static char imported[(int) end1000];
5399
static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5400
static const char import_string[] = ".IMPORT $$....,MILLICODE";
5401
#define MILLI_START 10
5402
 
5403
static void
5404
import_milli (enum millicodes code)
5405
{
5406
  char str[sizeof (import_string)];
5407
 
5408
  if (!imported[(int) code])
5409
    {
5410
      imported[(int) code] = 1;
5411
      strcpy (str, import_string);
5412
      strncpy (str + MILLI_START, milli_names[(int) code], 4);
5413
      output_asm_insn (str, 0);
5414
    }
5415
}
5416
 
5417
/* The register constraints have put the operands and return value in
5418
   the proper registers.  */
5419
 
5420
const char *
5421
output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
5422
{
5423
  import_milli (mulI);
5424
  return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5425
}
5426
 
5427
/* Emit the rtl for doing a division by a constant.  */
5428
 
5429
/* Do magic division millicodes exist for this value? */
5430
const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5431
 
5432
/* We'll use an array to keep track of the magic millicodes and
5433
   whether or not we've used them already. [n][0] is signed, [n][1] is
5434
   unsigned.  */
5435
 
5436
static int div_milli[16][2];
5437
 
5438
int
5439
emit_hpdiv_const (rtx *operands, int unsignedp)
5440
{
5441
  if (GET_CODE (operands[2]) == CONST_INT
5442
      && INTVAL (operands[2]) > 0
5443
      && INTVAL (operands[2]) < 16
5444
      && magic_milli[INTVAL (operands[2])])
5445
    {
5446
      rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5447
 
5448
      emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5449
      emit
5450
        (gen_rtx_PARALLEL
5451
         (VOIDmode,
5452
          gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
5453
                                     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5454
                                                     SImode,
5455
                                                     gen_rtx_REG (SImode, 26),
5456
                                                     operands[2])),
5457
                     gen_rtx_CLOBBER (VOIDmode, operands[4]),
5458
                     gen_rtx_CLOBBER (VOIDmode, operands[3]),
5459
                     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5460
                     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5461
                     gen_rtx_CLOBBER (VOIDmode, ret))));
5462
      emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5463
      return 1;
5464
    }
5465
  return 0;
5466
}
5467
 
5468
const char *
5469
output_div_insn (rtx *operands, int unsignedp, rtx insn)
5470
{
5471
  int divisor;
5472
 
5473
  /* If the divisor is a constant, try to use one of the special
5474
     opcodes .*/
5475
  if (GET_CODE (operands[0]) == CONST_INT)
5476
    {
5477
      static char buf[100];
5478
      divisor = INTVAL (operands[0]);
5479
      if (!div_milli[divisor][unsignedp])
5480
        {
5481
          div_milli[divisor][unsignedp] = 1;
5482
          if (unsignedp)
5483
            output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5484
          else
5485
            output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5486
        }
5487
      if (unsignedp)
5488
        {
5489
          sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5490
                   INTVAL (operands[0]));
5491
          return output_millicode_call (insn,
5492
                                        gen_rtx_SYMBOL_REF (SImode, buf));
5493
        }
5494
      else
5495
        {
5496
          sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5497
                   INTVAL (operands[0]));
5498
          return output_millicode_call (insn,
5499
                                        gen_rtx_SYMBOL_REF (SImode, buf));
5500
        }
5501
    }
5502
  /* Divisor isn't a special constant.  */
5503
  else
5504
    {
5505
      if (unsignedp)
5506
        {
5507
          import_milli (divU);
5508
          return output_millicode_call (insn,
5509
                                        gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5510
        }
5511
      else
5512
        {
5513
          import_milli (divI);
5514
          return output_millicode_call (insn,
5515
                                        gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5516
        }
5517
    }
5518
}
5519
 
5520
/* Output a $$rem millicode to do mod.  */
5521
 
5522
const char *
5523
output_mod_insn (int unsignedp, rtx insn)
5524
{
5525
  if (unsignedp)
5526
    {
5527
      import_milli (remU);
5528
      return output_millicode_call (insn,
5529
                                    gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5530
    }
5531
  else
5532
    {
5533
      import_milli (remI);
5534
      return output_millicode_call (insn,
5535
                                    gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5536
    }
5537
}
5538
 
5539
void
5540
output_arg_descriptor (rtx call_insn)
5541
{
5542
  const char *arg_regs[4];
5543
  enum machine_mode arg_mode;
5544
  rtx link;
5545
  int i, output_flag = 0;
5546
  int regno;
5547
 
5548
  /* We neither need nor want argument location descriptors for the
5549
     64bit runtime environment or the ELF32 environment.  */
5550
  if (TARGET_64BIT || TARGET_ELF32)
5551
    return;
5552
 
5553
  for (i = 0; i < 4; i++)
5554
    arg_regs[i] = 0;
5555
 
5556
  /* Specify explicitly that no argument relocations should take place
5557
     if using the portable runtime calling conventions.  */
5558
  if (TARGET_PORTABLE_RUNTIME)
5559
    {
5560
      fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5561
             asm_out_file);
5562
      return;
5563
    }
5564
 
5565
  gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5566
  for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5567
       link; link = XEXP (link, 1))
5568
    {
5569
      rtx use = XEXP (link, 0);
5570
 
5571
      if (! (GET_CODE (use) == USE
5572
             && GET_CODE (XEXP (use, 0)) == REG
5573
             && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5574
        continue;
5575
 
5576
      arg_mode = GET_MODE (XEXP (use, 0));
5577
      regno = REGNO (XEXP (use, 0));
5578
      if (regno >= 23 && regno <= 26)
5579
        {
5580
          arg_regs[26 - regno] = "GR";
5581
          if (arg_mode == DImode)
5582
            arg_regs[25 - regno] = "GR";
5583
        }
5584
      else if (regno >= 32 && regno <= 39)
5585
        {
5586
          if (arg_mode == SFmode)
5587
            arg_regs[(regno - 32) / 2] = "FR";
5588
          else
5589
            {
5590
#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5591
              arg_regs[(regno - 34) / 2] = "FR";
5592
              arg_regs[(regno - 34) / 2 + 1] = "FU";
5593
#else
5594
              arg_regs[(regno - 34) / 2] = "FU";
5595
              arg_regs[(regno - 34) / 2 + 1] = "FR";
5596
#endif
5597
            }
5598
        }
5599
    }
5600
  fputs ("\t.CALL ", asm_out_file);
5601
  for (i = 0; i < 4; i++)
5602
    {
5603
      if (arg_regs[i])
5604
        {
5605
          if (output_flag++)
5606
            fputc (',', asm_out_file);
5607
          fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5608
        }
5609
    }
5610
  fputc ('\n', asm_out_file);
5611
}
5612
 
5613
static enum reg_class
5614
pa_secondary_reload (bool in_p, rtx x, enum reg_class class,
5615
                     enum machine_mode mode, secondary_reload_info *sri)
5616
{
5617
  int is_symbolic, regno;
5618
 
5619
  /* Handle the easy stuff first.  */
5620
  if (class == R1_REGS)
5621
    return NO_REGS;
5622
 
5623
  if (REG_P (x))
5624
    {
5625
      regno = REGNO (x);
5626
      if (class == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
5627
        return NO_REGS;
5628
    }
5629
  else
5630
    regno = -1;
5631
 
5632
  /* If we have something like (mem (mem (...)), we can safely assume the
5633
     inner MEM will end up in a general register after reloading, so there's
5634
     no need for a secondary reload.  */
5635
  if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5636
    return NO_REGS;
5637
 
5638
  /* Trying to load a constant into a FP register during PIC code
5639
     generation requires %r1 as a scratch register.  */
5640
  if (flag_pic
5641
      && (mode == SImode || mode == DImode)
5642
      && FP_REG_CLASS_P (class)
5643
      && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
5644
    {
5645
      sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5646
                    : CODE_FOR_reload_indi_r1);
5647
      return NO_REGS;
5648
    }
5649
 
5650
  /* Profiling showed the PA port spends about 1.3% of its compilation
5651
     time in true_regnum from calls inside pa_secondary_reload_class.  */
5652
  if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5653
    regno = true_regnum (x);
5654
 
5655
  /* Handle out of range displacement for integer mode loads/stores of
5656
     FP registers.  */
5657
  if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5658
       && GET_MODE_CLASS (mode) == MODE_INT
5659
       && FP_REG_CLASS_P (class))
5660
      || (class == SHIFT_REGS && (regno <= 0 || regno >= 32)))
5661
    {
5662
      sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5663
      return NO_REGS;
5664
    }
5665
 
5666
  /* A SAR<->FP register copy requires a secondary register (GPR) as
5667
     well as secondary memory.  */
5668
  if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
5669
      && ((REGNO_REG_CLASS (regno) == SHIFT_REGS && FP_REG_CLASS_P (class))
5670
          || (class == SHIFT_REGS
5671
              && FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))))
5672
    {
5673
      sri->icode = in_p ? reload_in_optab[mode] : reload_out_optab[mode];
5674
      return NO_REGS;
5675
    }
5676
 
5677
  /* Secondary reloads of symbolic operands require %r1 as a scratch
5678
     register when we're generating PIC code and the operand isn't
5679
     readonly.  */
5680
  if (GET_CODE (x) == HIGH)
5681
    x = XEXP (x, 0);
5682
 
5683
  /* Profiling has showed GCC spends about 2.6% of its compilation
5684
     time in symbolic_operand from calls inside pa_secondary_reload_class.
5685
     So, we use an inline copy to avoid useless work.  */
5686
  switch (GET_CODE (x))
5687
    {
5688
      rtx op;
5689
 
5690
      case SYMBOL_REF:
5691
        is_symbolic = !SYMBOL_REF_TLS_MODEL (x);
5692
        break;
5693
      case LABEL_REF:
5694
        is_symbolic = 1;
5695
        break;
5696
      case CONST:
5697
        op = XEXP (x, 0);
5698
        is_symbolic = (((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
5699
                         && !SYMBOL_REF_TLS_MODEL (XEXP (op, 0)))
5700
                        || GET_CODE (XEXP (op, 0)) == LABEL_REF)
5701
                       && GET_CODE (XEXP (op, 1)) == CONST_INT);
5702
        break;
5703
      default:
5704
        is_symbolic = 0;
5705
        break;
5706
    }
5707
 
5708
  if (is_symbolic && (flag_pic || !read_only_operand (x, VOIDmode)))
5709
    {
5710
      gcc_assert (mode == SImode || mode == DImode);
5711
      sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5712
                    : CODE_FOR_reload_indi_r1);
5713
    }
5714
 
5715
  return NO_REGS;
5716
}
5717
 
5718
/* In the 32-bit runtime, arguments larger than eight bytes are passed
5719
   by invisible reference.  As a GCC extension, we also pass anything
5720
   with a zero or variable size by reference.
5721
 
5722
   The 64-bit runtime does not describe passing any types by invisible
5723
   reference.  The internals of GCC can't currently handle passing
5724
   empty structures, and zero or variable length arrays when they are
5725
   not passed entirely on the stack or by reference.  Thus, as a GCC
5726
   extension, we pass these types by reference.  The HP compiler doesn't
5727
   support these types, so hopefully there shouldn't be any compatibility
5728
   issues.  This may have to be revisited when HP releases a C99 compiler
5729
   or updates the ABI.  */
5730
 
5731
static bool
5732
pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
5733
                      enum machine_mode mode, tree type,
5734
                      bool named ATTRIBUTE_UNUSED)
5735
{
5736
  HOST_WIDE_INT size;
5737
 
5738
  if (type)
5739
    size = int_size_in_bytes (type);
5740
  else
5741
    size = GET_MODE_SIZE (mode);
5742
 
5743
  if (TARGET_64BIT)
5744
    return size <= 0;
5745
  else
5746
    return size <= 0 || size > 8;
5747
}
5748
 
5749
enum direction
5750
function_arg_padding (enum machine_mode mode, tree type)
5751
{
5752
  if (mode == BLKmode
5753
      || (TARGET_64BIT && type && AGGREGATE_TYPE_P (type)))
5754
    {
5755
      /* Return none if justification is not required.  */
5756
      if (type
5757
          && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5758
          && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5759
        return none;
5760
 
5761
      /* The directions set here are ignored when a BLKmode argument larger
5762
         than a word is placed in a register.  Different code is used for
5763
         the stack and registers.  This makes it difficult to have a
5764
         consistent data representation for both the stack and registers.
5765
         For both runtimes, the justification and padding for arguments on
5766
         the stack and in registers should be identical.  */
5767
      if (TARGET_64BIT)
5768
        /* The 64-bit runtime specifies left justification for aggregates.  */
5769
        return upward;
5770
      else
5771
        /* The 32-bit runtime architecture specifies right justification.
5772
           When the argument is passed on the stack, the argument is padded
5773
           with garbage on the left.  The HP compiler pads with zeros.  */
5774
        return downward;
5775
    }
5776
 
5777
  if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
5778
    return downward;
5779
  else
5780
    return none;
5781
}
5782
 
5783
 
5784
/* Do what is necessary for `va_start'.  We look at the current function
5785
   to determine if stdargs or varargs is used and fill in an initial
5786
   va_list.  A pointer to this constructor is returned.  */
5787
 
5788
static rtx
5789
hppa_builtin_saveregs (void)
5790
{
5791
  rtx offset, dest;
5792
  tree fntype = TREE_TYPE (current_function_decl);
5793
  int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0
5794
                   && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
5795
                       != void_type_node)))
5796
                ? UNITS_PER_WORD : 0);
5797
 
5798
  if (argadj)
5799
    offset = plus_constant (current_function_arg_offset_rtx, argadj);
5800
  else
5801
    offset = current_function_arg_offset_rtx;
5802
 
5803
  if (TARGET_64BIT)
5804
    {
5805
      int i, off;
5806
 
5807
      /* Adjust for varargs/stdarg differences.  */
5808
      if (argadj)
5809
        offset = plus_constant (current_function_arg_offset_rtx, -argadj);
5810
      else
5811
        offset = current_function_arg_offset_rtx;
5812
 
5813
      /* We need to save %r26 .. %r19 inclusive starting at offset -64
5814
         from the incoming arg pointer and growing to larger addresses.  */
5815
      for (i = 26, off = -64; i >= 19; i--, off += 8)
5816
        emit_move_insn (gen_rtx_MEM (word_mode,
5817
                                     plus_constant (arg_pointer_rtx, off)),
5818
                        gen_rtx_REG (word_mode, i));
5819
 
5820
      /* The incoming args pointer points just beyond the flushback area;
5821
         normally this is not a serious concern.  However, when we are doing
5822
         varargs/stdargs we want to make the arg pointer point to the start
5823
         of the incoming argument area.  */
5824
      emit_move_insn (virtual_incoming_args_rtx,
5825
                      plus_constant (arg_pointer_rtx, -64));
5826
 
5827
      /* Now return a pointer to the first anonymous argument.  */
5828
      return copy_to_reg (expand_binop (Pmode, add_optab,
5829
                                        virtual_incoming_args_rtx,
5830
                                        offset, 0, 0, OPTAB_LIB_WIDEN));
5831
    }
5832
 
5833
  /* Store general registers on the stack.  */
5834
  dest = gen_rtx_MEM (BLKmode,
5835
                      plus_constant (current_function_internal_arg_pointer,
5836
                                     -16));
5837
  set_mem_alias_set (dest, get_varargs_alias_set ());
5838
  set_mem_align (dest, BITS_PER_WORD);
5839
  move_block_from_reg (23, dest, 4);
5840
 
5841
  /* move_block_from_reg will emit code to store the argument registers
5842
     individually as scalar stores.
5843
 
5844
     However, other insns may later load from the same addresses for
5845
     a structure load (passing a struct to a varargs routine).
5846
 
5847
     The alias code assumes that such aliasing can never happen, so we
5848
     have to keep memory referencing insns from moving up beyond the
5849
     last argument register store.  So we emit a blockage insn here.  */
5850
  emit_insn (gen_blockage ());
5851
 
5852
  return copy_to_reg (expand_binop (Pmode, add_optab,
5853
                                    current_function_internal_arg_pointer,
5854
                                    offset, 0, 0, OPTAB_LIB_WIDEN));
5855
}
5856
 
5857
void
5858
hppa_va_start (tree valist, rtx nextarg)
5859
{
5860
  nextarg = expand_builtin_saveregs ();
5861
  std_expand_builtin_va_start (valist, nextarg);
5862
}
5863
 
5864
static tree
5865
hppa_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p, tree *post_p)
5866
{
5867
  if (TARGET_64BIT)
5868
    {
5869
      /* Args grow upward.  We can use the generic routines.  */
5870
      return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5871
    }
5872
  else /* !TARGET_64BIT */
5873
    {
5874
      tree ptr = build_pointer_type (type);
5875
      tree valist_type;
5876
      tree t, u;
5877
      unsigned int size, ofs;
5878
      bool indirect;
5879
 
5880
      indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
5881
      if (indirect)
5882
        {
5883
          type = ptr;
5884
          ptr = build_pointer_type (type);
5885
        }
5886
      size = int_size_in_bytes (type);
5887
      valist_type = TREE_TYPE (valist);
5888
 
5889
      /* Args grow down.  Not handled by generic routines.  */
5890
 
5891
      u = fold_convert (valist_type, size_in_bytes (type));
5892
      t = build2 (MINUS_EXPR, valist_type, valist, u);
5893
 
5894
      /* Copied from va-pa.h, but we probably don't need to align to
5895
         word size, since we generate and preserve that invariant.  */
5896
      u = build_int_cst (valist_type, (size > 4 ? -8 : -4));
5897
      t = build2 (BIT_AND_EXPR, valist_type, t, u);
5898
 
5899
      t = build2 (MODIFY_EXPR, valist_type, valist, t);
5900
 
5901
      ofs = (8 - size) % 4;
5902
      if (ofs != 0)
5903
        {
5904
          u = fold_convert (valist_type, size_int (ofs));
5905
          t = build2 (PLUS_EXPR, valist_type, t, u);
5906
        }
5907
 
5908
      t = fold_convert (ptr, t);
5909
      t = build_va_arg_indirect_ref (t);
5910
 
5911
      if (indirect)
5912
        t = build_va_arg_indirect_ref (t);
5913
 
5914
      return t;
5915
    }
5916
}
5917
 
5918
/* True if MODE is valid for the target.  By "valid", we mean able to
5919
   be manipulated in non-trivial ways.  In particular, this means all
5920
   the arithmetic is supported.
5921
 
5922
   Currently, TImode is not valid as the HP 64-bit runtime documentation
5923
   doesn't document the alignment and calling conventions for this type.
5924
   Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
5925
   2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE.  */
5926
 
5927
static bool
5928
pa_scalar_mode_supported_p (enum machine_mode mode)
5929
{
5930
  int precision = GET_MODE_PRECISION (mode);
5931
 
5932
  switch (GET_MODE_CLASS (mode))
5933
    {
5934
    case MODE_PARTIAL_INT:
5935
    case MODE_INT:
5936
      if (precision == CHAR_TYPE_SIZE)
5937
        return true;
5938
      if (precision == SHORT_TYPE_SIZE)
5939
        return true;
5940
      if (precision == INT_TYPE_SIZE)
5941
        return true;
5942
      if (precision == LONG_TYPE_SIZE)
5943
        return true;
5944
      if (precision == LONG_LONG_TYPE_SIZE)
5945
        return true;
5946
      return false;
5947
 
5948
    case MODE_FLOAT:
5949
      if (precision == FLOAT_TYPE_SIZE)
5950
        return true;
5951
      if (precision == DOUBLE_TYPE_SIZE)
5952
        return true;
5953
      if (precision == LONG_DOUBLE_TYPE_SIZE)
5954
        return true;
5955
      return false;
5956
 
5957
    case MODE_DECIMAL_FLOAT:
5958
      return false;
5959
 
5960
    default:
5961
      gcc_unreachable ();
5962
    }
5963
}
5964
 
5965
/* This routine handles all the normal conditional branch sequences we
5966
   might need to generate.  It handles compare immediate vs compare
5967
   register, nullification of delay slots, varying length branches,
5968
   negated branches, and all combinations of the above.  It returns the
5969
   output appropriate to emit the branch corresponding to all given
5970
   parameters.  */
5971
 
5972
const char *
5973
output_cbranch (rtx *operands, int negated, rtx insn)
5974
{
5975
  static char buf[100];
5976
  int useskip = 0;
5977
  int nullify = INSN_ANNULLED_BRANCH_P (insn);
5978
  int length = get_attr_length (insn);
5979
  int xdelay;
5980
 
5981
  /* A conditional branch to the following instruction (e.g. the delay slot)
5982
     is asking for a disaster.  This can happen when not optimizing and
5983
     when jump optimization fails.
5984
 
5985
     While it is usually safe to emit nothing, this can fail if the
5986
     preceding instruction is a nullified branch with an empty delay
5987
     slot and the same branch target as this branch.  We could check
5988
     for this but jump optimization should eliminate nop jumps.  It
5989
     is always safe to emit a nop.  */
5990
  if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
5991
    return "nop";
5992
 
5993
  /* The doubleword form of the cmpib instruction doesn't have the LEU
5994
     and GTU conditions while the cmpb instruction does.  Since we accept
5995
     zero for cmpb, we must ensure that we use cmpb for the comparison.  */
5996
  if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
5997
    operands[2] = gen_rtx_REG (DImode, 0);
5998
  if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
5999
    operands[1] = gen_rtx_REG (DImode, 0);
6000
 
6001
  /* If this is a long branch with its delay slot unfilled, set `nullify'
6002
     as it can nullify the delay slot and save a nop.  */
6003
  if (length == 8 && dbr_sequence_length () == 0)
6004
    nullify = 1;
6005
 
6006
  /* If this is a short forward conditional branch which did not get
6007
     its delay slot filled, the delay slot can still be nullified.  */
6008
  if (! nullify && length == 4 && dbr_sequence_length () == 0)
6009
    nullify = forward_branch_p (insn);
6010
 
6011
  /* A forward branch over a single nullified insn can be done with a
6012
     comclr instruction.  This avoids a single cycle penalty due to
6013
     mis-predicted branch if we fall through (branch not taken).  */
6014
  if (length == 4
6015
      && next_real_insn (insn) != 0
6016
      && get_attr_length (next_real_insn (insn)) == 4
6017
      && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6018
      && nullify)
6019
    useskip = 1;
6020
 
6021
  switch (length)
6022
    {
6023
      /* All short conditional branches except backwards with an unfilled
6024
         delay slot.  */
6025
      case 4:
6026
        if (useskip)
6027
          strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6028
        else
6029
          strcpy (buf, "{com%I2b,|cmp%I2b,}");
6030
        if (GET_MODE (operands[1]) == DImode)
6031
          strcat (buf, "*");
6032
        if (negated)
6033
          strcat (buf, "%B3");
6034
        else
6035
          strcat (buf, "%S3");
6036
        if (useskip)
6037
          strcat (buf, " %2,%r1,%%r0");
6038
        else if (nullify)
6039
          strcat (buf, ",n %2,%r1,%0");
6040
        else
6041
          strcat (buf, " %2,%r1,%0");
6042
        break;
6043
 
6044
     /* All long conditionals.  Note a short backward branch with an
6045
        unfilled delay slot is treated just like a long backward branch
6046
        with an unfilled delay slot.  */
6047
      case 8:
6048
        /* Handle weird backwards branch with a filled delay slot
6049
           which is nullified.  */
6050
        if (dbr_sequence_length () != 0
6051
            && ! forward_branch_p (insn)
6052
            && nullify)
6053
          {
6054
            strcpy (buf, "{com%I2b,|cmp%I2b,}");
6055
            if (GET_MODE (operands[1]) == DImode)
6056
              strcat (buf, "*");
6057
            if (negated)
6058
              strcat (buf, "%S3");
6059
            else
6060
              strcat (buf, "%B3");
6061
            strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6062
          }
6063
        /* Handle short backwards branch with an unfilled delay slot.
6064
           Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6065
           taken and untaken branches.  */
6066
        else if (dbr_sequence_length () == 0
6067
                 && ! forward_branch_p (insn)
6068
                 && INSN_ADDRESSES_SET_P ()
6069
                 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6070
                                    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6071
          {
6072
            strcpy (buf, "{com%I2b,|cmp%I2b,}");
6073
            if (GET_MODE (operands[1]) == DImode)
6074
              strcat (buf, "*");
6075
            if (negated)
6076
              strcat (buf, "%B3 %2,%r1,%0%#");
6077
            else
6078
              strcat (buf, "%S3 %2,%r1,%0%#");
6079
          }
6080
        else
6081
          {
6082
            strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6083
            if (GET_MODE (operands[1]) == DImode)
6084
              strcat (buf, "*");
6085
            if (negated)
6086
              strcat (buf, "%S3");
6087
            else
6088
              strcat (buf, "%B3");
6089
            if (nullify)
6090
              strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6091
            else
6092
              strcat (buf, " %2,%r1,%%r0\n\tb %0");
6093
          }
6094
        break;
6095
 
6096
      default:
6097
        /* The reversed conditional branch must branch over one additional
6098
           instruction if the delay slot is filled and needs to be extracted
6099
           by output_lbranch.  If the delay slot is empty or this is a
6100
           nullified forward branch, the instruction after the reversed
6101
           condition branch must be nullified.  */
6102
        if (dbr_sequence_length () == 0
6103
            || (nullify && forward_branch_p (insn)))
6104
          {
6105
            nullify = 1;
6106
            xdelay = 0;
6107
            operands[4] = GEN_INT (length);
6108
          }
6109
        else
6110
          {
6111
            xdelay = 1;
6112
            operands[4] = GEN_INT (length + 4);
6113
          }
6114
 
6115
        /* Create a reversed conditional branch which branches around
6116
           the following insns.  */
6117
        if (GET_MODE (operands[1]) != DImode)
6118
          {
6119
            if (nullify)
6120
              {
6121
                if (negated)
6122
                  strcpy (buf,
6123
                    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6124
                else
6125
                  strcpy (buf,
6126
                    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6127
              }
6128
            else
6129
              {
6130
                if (negated)
6131
                  strcpy (buf,
6132
                    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6133
                else
6134
                  strcpy (buf,
6135
                    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6136
              }
6137
          }
6138
        else
6139
          {
6140
            if (nullify)
6141
              {
6142
                if (negated)
6143
                  strcpy (buf,
6144
                    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6145
                else
6146
                  strcpy (buf,
6147
                    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6148
              }
6149
            else
6150
              {
6151
                if (negated)
6152
                  strcpy (buf,
6153
                    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6154
                else
6155
                  strcpy (buf,
6156
                    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6157
              }
6158
          }
6159
 
6160
        output_asm_insn (buf, operands);
6161
        return output_lbranch (operands[0], insn, xdelay);
6162
    }
6163
  return buf;
6164
}
6165
 
6166
/* This routine handles output of long unconditional branches that
6167
   exceed the maximum range of a simple branch instruction.  Since
6168
   we don't have a register available for the branch, we save register
6169
   %r1 in the frame marker, load the branch destination DEST into %r1,
6170
   execute the branch, and restore %r1 in the delay slot of the branch.
6171
 
6172
   Since long branches may have an insn in the delay slot and the
6173
   delay slot is used to restore %r1, we in general need to extract
6174
   this insn and execute it before the branch.  However, to facilitate
6175
   use of this function by conditional branches, we also provide an
6176
   option to not extract the delay insn so that it will be emitted
6177
   after the long branch.  So, if there is an insn in the delay slot,
6178
   it is extracted if XDELAY is nonzero.
6179
 
6180
   The lengths of the various long-branch sequences are 20, 16 and 24
6181
   bytes for the portable runtime, non-PIC and PIC cases, respectively.  */
6182
 
6183
const char *
6184
output_lbranch (rtx dest, rtx insn, int xdelay)
6185
{
6186
  rtx xoperands[2];
6187
 
6188
  xoperands[0] = dest;
6189
 
6190
  /* First, free up the delay slot.  */
6191
  if (xdelay && dbr_sequence_length () != 0)
6192
    {
6193
      /* We can't handle a jump in the delay slot.  */
6194
      gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
6195
 
6196
      final_scan_insn (NEXT_INSN (insn), asm_out_file,
6197
                       optimize, 0, NULL);
6198
 
6199
      /* Now delete the delay insn.  */
6200
      PUT_CODE (NEXT_INSN (insn), NOTE);
6201
      NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
6202
      NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
6203
    }
6204
 
6205
  /* Output an insn to save %r1.  The runtime documentation doesn't
6206
     specify whether the "Clean Up" slot in the callers frame can
6207
     be clobbered by the callee.  It isn't copied by HP's builtin
6208
     alloca, so this suggests that it can be clobbered if necessary.
6209
     The "Static Link" location is copied by HP builtin alloca, so
6210
     we avoid using it.  Using the cleanup slot might be a problem
6211
     if we have to interoperate with languages that pass cleanup
6212
     information.  However, it should be possible to handle these
6213
     situations with GCC's asm feature.
6214
 
6215
     The "Current RP" slot is reserved for the called procedure, so
6216
     we try to use it when we don't have a frame of our own.  It's
6217
     rather unlikely that we won't have a frame when we need to emit
6218
     a very long branch.
6219
 
6220
     Really the way to go long term is a register scavenger; goto
6221
     the target of the jump and find a register which we can use
6222
     as a scratch to hold the value in %r1.  Then, we wouldn't have
6223
     to free up the delay slot or clobber a slot that may be needed
6224
     for other purposes.  */
6225
  if (TARGET_64BIT)
6226
    {
6227
      if (actual_fsize == 0 && !regs_ever_live[2])
6228
        /* Use the return pointer slot in the frame marker.  */
6229
        output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6230
      else
6231
        /* Use the slot at -40 in the frame marker since HP builtin
6232
           alloca doesn't copy it.  */
6233
        output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6234
    }
6235
  else
6236
    {
6237
      if (actual_fsize == 0 && !regs_ever_live[2])
6238
        /* Use the return pointer slot in the frame marker.  */
6239
        output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6240
      else
6241
        /* Use the "Clean Up" slot in the frame marker.  In GCC,
6242
           the only other use of this location is for copying a
6243
           floating point double argument from a floating-point
6244
           register to two general registers.  The copy is done
6245
           as an "atomic" operation when outputting a call, so it
6246
           won't interfere with our using the location here.  */
6247
        output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6248
    }
6249
 
6250
  if (TARGET_PORTABLE_RUNTIME)
6251
    {
6252
      output_asm_insn ("ldil L'%0,%%r1", xoperands);
6253
      output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6254
      output_asm_insn ("bv %%r0(%%r1)", xoperands);
6255
    }
6256
  else if (flag_pic)
6257
    {
6258
      output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6259
      if (TARGET_SOM || !TARGET_GAS)
6260
        {
6261
          xoperands[1] = gen_label_rtx ();
6262
          output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
6263
          (*targetm.asm_out.internal_label) (asm_out_file, "L",
6264
                                             CODE_LABEL_NUMBER (xoperands[1]));
6265
          output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
6266
        }
6267
      else
6268
        {
6269
          output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6270
          output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6271
        }
6272
      output_asm_insn ("bv %%r0(%%r1)", xoperands);
6273
    }
6274
  else
6275
    /* Now output a very long branch to the original target.  */
6276
    output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6277
 
6278
  /* Now restore the value of %r1 in the delay slot.  */
6279
  if (TARGET_64BIT)
6280
    {
6281
      if (actual_fsize == 0 && !regs_ever_live[2])
6282
        return "ldd -16(%%r30),%%r1";
6283
      else
6284
        return "ldd -40(%%r30),%%r1";
6285
    }
6286
  else
6287
    {
6288
      if (actual_fsize == 0 && !regs_ever_live[2])
6289
        return "ldw -20(%%r30),%%r1";
6290
      else
6291
        return "ldw -12(%%r30),%%r1";
6292
    }
6293
}
6294
 
6295
/* This routine handles all the branch-on-bit conditional branch sequences we
6296
   might need to generate.  It handles nullification of delay slots,
6297
   varying length branches, negated branches and all combinations of the
6298
   above.  it returns the appropriate output template to emit the branch.  */
6299
 
6300
const char *
6301
output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6302
{
6303
  static char buf[100];
6304
  int useskip = 0;
6305
  int nullify = INSN_ANNULLED_BRANCH_P (insn);
6306
  int length = get_attr_length (insn);
6307
  int xdelay;
6308
 
6309
  /* A conditional branch to the following instruction (e.g. the delay slot) is
6310
     asking for a disaster.  I do not think this can happen as this pattern
6311
     is only used when optimizing; jump optimization should eliminate the
6312
     jump.  But be prepared just in case.  */
6313
 
6314
  if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6315
    return "nop";
6316
 
6317
  /* If this is a long branch with its delay slot unfilled, set `nullify'
6318
     as it can nullify the delay slot and save a nop.  */
6319
  if (length == 8 && dbr_sequence_length () == 0)
6320
    nullify = 1;
6321
 
6322
  /* If this is a short forward conditional branch which did not get
6323
     its delay slot filled, the delay slot can still be nullified.  */
6324
  if (! nullify && length == 4 && dbr_sequence_length () == 0)
6325
    nullify = forward_branch_p (insn);
6326
 
6327
  /* A forward branch over a single nullified insn can be done with a
6328
     extrs instruction.  This avoids a single cycle penalty due to
6329
     mis-predicted branch if we fall through (branch not taken).  */
6330
 
6331
  if (length == 4
6332
      && next_real_insn (insn) != 0
6333
      && get_attr_length (next_real_insn (insn)) == 4
6334
      && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6335
      && nullify)
6336
    useskip = 1;
6337
 
6338
  switch (length)
6339
    {
6340
 
6341
      /* All short conditional branches except backwards with an unfilled
6342
         delay slot.  */
6343
      case 4:
6344
        if (useskip)
6345
          strcpy (buf, "{extrs,|extrw,s,}");
6346
        else
6347
          strcpy (buf, "bb,");
6348
        if (useskip && GET_MODE (operands[0]) == DImode)
6349
          strcpy (buf, "extrd,s,*");
6350
        else if (GET_MODE (operands[0]) == DImode)
6351
          strcpy (buf, "bb,*");
6352
        if ((which == 0 && negated)
6353
             || (which == 1 && ! negated))
6354
          strcat (buf, ">=");
6355
        else
6356
          strcat (buf, "<");
6357
        if (useskip)
6358
          strcat (buf, " %0,%1,1,%%r0");
6359
        else if (nullify && negated)
6360
          strcat (buf, ",n %0,%1,%3");
6361
        else if (nullify && ! negated)
6362
          strcat (buf, ",n %0,%1,%2");
6363
        else if (! nullify && negated)
6364
          strcat (buf, "%0,%1,%3");
6365
        else if (! nullify && ! negated)
6366
          strcat (buf, " %0,%1,%2");
6367
        break;
6368
 
6369
     /* All long conditionals.  Note a short backward branch with an
6370
        unfilled delay slot is treated just like a long backward branch
6371
        with an unfilled delay slot.  */
6372
      case 8:
6373
        /* Handle weird backwards branch with a filled delay slot
6374
           which is nullified.  */
6375
        if (dbr_sequence_length () != 0
6376
            && ! forward_branch_p (insn)
6377
            && nullify)
6378
          {
6379
            strcpy (buf, "bb,");
6380
            if (GET_MODE (operands[0]) == DImode)
6381
              strcat (buf, "*");
6382
            if ((which == 0 && negated)
6383
                || (which == 1 && ! negated))
6384
              strcat (buf, "<");
6385
            else
6386
              strcat (buf, ">=");
6387
            if (negated)
6388
              strcat (buf, ",n %0,%1,.+12\n\tb %3");
6389
            else
6390
              strcat (buf, ",n %0,%1,.+12\n\tb %2");
6391
          }
6392
        /* Handle short backwards branch with an unfilled delay slot.
6393
           Using a bb;nop rather than extrs;bl saves 1 cycle for both
6394
           taken and untaken branches.  */
6395
        else if (dbr_sequence_length () == 0
6396
                 && ! forward_branch_p (insn)
6397
                 && INSN_ADDRESSES_SET_P ()
6398
                 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6399
                                    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6400
          {
6401
            strcpy (buf, "bb,");
6402
            if (GET_MODE (operands[0]) == DImode)
6403
              strcat (buf, "*");
6404
            if ((which == 0 && negated)
6405
                || (which == 1 && ! negated))
6406
              strcat (buf, ">=");
6407
            else
6408
              strcat (buf, "<");
6409
            if (negated)
6410
              strcat (buf, " %0,%1,%3%#");
6411
            else
6412
              strcat (buf, " %0,%1,%2%#");
6413
          }
6414
        else
6415
          {
6416
            if (GET_MODE (operands[0]) == DImode)
6417
              strcpy (buf, "extrd,s,*");
6418
            else
6419
              strcpy (buf, "{extrs,|extrw,s,}");
6420
            if ((which == 0 && negated)
6421
                || (which == 1 && ! negated))
6422
              strcat (buf, "<");
6423
            else
6424
              strcat (buf, ">=");
6425
            if (nullify && negated)
6426
              strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
6427
            else if (nullify && ! negated)
6428
              strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
6429
            else if (negated)
6430
              strcat (buf, " %0,%1,1,%%r0\n\tb %3");
6431
            else
6432
              strcat (buf, " %0,%1,1,%%r0\n\tb %2");
6433
          }
6434
        break;
6435
 
6436
      default:
6437
        /* The reversed conditional branch must branch over one additional
6438
           instruction if the delay slot is filled and needs to be extracted
6439
           by output_lbranch.  If the delay slot is empty or this is a
6440
           nullified forward branch, the instruction after the reversed
6441
           condition branch must be nullified.  */
6442
        if (dbr_sequence_length () == 0
6443
            || (nullify && forward_branch_p (insn)))
6444
          {
6445
            nullify = 1;
6446
            xdelay = 0;
6447
            operands[4] = GEN_INT (length);
6448
          }
6449
        else
6450
          {
6451
            xdelay = 1;
6452
            operands[4] = GEN_INT (length + 4);
6453
          }
6454
 
6455
        if (GET_MODE (operands[0]) == DImode)
6456
          strcpy (buf, "bb,*");
6457
        else
6458
          strcpy (buf, "bb,");
6459
        if ((which == 0 && negated)
6460
            || (which == 1 && !negated))
6461
          strcat (buf, "<");
6462
        else
6463
          strcat (buf, ">=");
6464
        if (nullify)
6465
          strcat (buf, ",n %0,%1,.+%4");
6466
        else
6467
          strcat (buf, " %0,%1,.+%4");
6468
        output_asm_insn (buf, operands);
6469
        return output_lbranch (negated ? operands[3] : operands[2],
6470
                               insn, xdelay);
6471
    }
6472
  return buf;
6473
}
6474
 
6475
/* This routine handles all the branch-on-variable-bit conditional branch
6476
   sequences we might need to generate.  It handles nullification of delay
6477
   slots, varying length branches, negated branches and all combinations
6478
   of the above.  it returns the appropriate output template to emit the
6479
   branch.  */
6480
 
6481
const char *
6482
output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6483
{
6484
  static char buf[100];
6485
  int useskip = 0;
6486
  int nullify = INSN_ANNULLED_BRANCH_P (insn);
6487
  int length = get_attr_length (insn);
6488
  int xdelay;
6489
 
6490
  /* A conditional branch to the following instruction (e.g. the delay slot) is
6491
     asking for a disaster.  I do not think this can happen as this pattern
6492
     is only used when optimizing; jump optimization should eliminate the
6493
     jump.  But be prepared just in case.  */
6494
 
6495
  if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6496
    return "nop";
6497
 
6498
  /* If this is a long branch with its delay slot unfilled, set `nullify'
6499
     as it can nullify the delay slot and save a nop.  */
6500
  if (length == 8 && dbr_sequence_length () == 0)
6501
    nullify = 1;
6502
 
6503
  /* If this is a short forward conditional branch which did not get
6504
     its delay slot filled, the delay slot can still be nullified.  */
6505
  if (! nullify && length == 4 && dbr_sequence_length () == 0)
6506
    nullify = forward_branch_p (insn);
6507
 
6508
  /* A forward branch over a single nullified insn can be done with a
6509
     extrs instruction.  This avoids a single cycle penalty due to
6510
     mis-predicted branch if we fall through (branch not taken).  */
6511
 
6512
  if (length == 4
6513
      && next_real_insn (insn) != 0
6514
      && get_attr_length (next_real_insn (insn)) == 4
6515
      && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn))
6516
      && nullify)
6517
    useskip = 1;
6518
 
6519
  switch (length)
6520
    {
6521
 
6522
      /* All short conditional branches except backwards with an unfilled
6523
         delay slot.  */
6524
      case 4:
6525
        if (useskip)
6526
          strcpy (buf, "{vextrs,|extrw,s,}");
6527
        else
6528
          strcpy (buf, "{bvb,|bb,}");
6529
        if (useskip && GET_MODE (operands[0]) == DImode)
6530
          strcpy (buf, "extrd,s,*");
6531
        else if (GET_MODE (operands[0]) == DImode)
6532
          strcpy (buf, "bb,*");
6533
        if ((which == 0 && negated)
6534
             || (which == 1 && ! negated))
6535
          strcat (buf, ">=");
6536
        else
6537
          strcat (buf, "<");
6538
        if (useskip)
6539
          strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6540
        else if (nullify && negated)
6541
          strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6542
        else if (nullify && ! negated)
6543
          strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6544
        else if (! nullify && negated)
6545
          strcat (buf, "{%0,%3|%0,%%sar,%3}");
6546
        else if (! nullify && ! negated)
6547
          strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6548
        break;
6549
 
6550
     /* All long conditionals.  Note a short backward branch with an
6551
        unfilled delay slot is treated just like a long backward branch
6552
        with an unfilled delay slot.  */
6553
      case 8:
6554
        /* Handle weird backwards branch with a filled delay slot
6555
           which is nullified.  */
6556
        if (dbr_sequence_length () != 0
6557
            && ! forward_branch_p (insn)
6558
            && nullify)
6559
          {
6560
            strcpy (buf, "{bvb,|bb,}");
6561
            if (GET_MODE (operands[0]) == DImode)
6562
              strcat (buf, "*");
6563
            if ((which == 0 && negated)
6564
                || (which == 1 && ! negated))
6565
              strcat (buf, "<");
6566
            else
6567
              strcat (buf, ">=");
6568
            if (negated)
6569
              strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6570
            else
6571
              strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6572
          }
6573
        /* Handle short backwards branch with an unfilled delay slot.
6574
           Using a bb;nop rather than extrs;bl saves 1 cycle for both
6575
           taken and untaken branches.  */
6576
        else if (dbr_sequence_length () == 0
6577
                 && ! forward_branch_p (insn)
6578
                 && INSN_ADDRESSES_SET_P ()
6579
                 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6580
                                    - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6581
          {
6582
            strcpy (buf, "{bvb,|bb,}");
6583
            if (GET_MODE (operands[0]) == DImode)
6584
              strcat (buf, "*");
6585
            if ((which == 0 && negated)
6586
                || (which == 1 && ! negated))
6587
              strcat (buf, ">=");
6588
            else
6589
              strcat (buf, "<");
6590
            if (negated)
6591
              strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6592
            else
6593
              strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6594
          }
6595
        else
6596
          {
6597
            strcpy (buf, "{vextrs,|extrw,s,}");
6598
            if (GET_MODE (operands[0]) == DImode)
6599
              strcpy (buf, "extrd,s,*");
6600
            if ((which == 0 && negated)
6601
                || (which == 1 && ! negated))
6602
              strcat (buf, "<");
6603
            else
6604
              strcat (buf, ">=");
6605
            if (nullify && negated)
6606
              strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6607
            else if (nullify && ! negated)
6608
              strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6609
            else if (negated)
6610
              strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6611
            else
6612
              strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6613
          }
6614
        break;
6615
 
6616
      default:
6617
        /* The reversed conditional branch must branch over one additional
6618
           instruction if the delay slot is filled and needs to be extracted
6619
           by output_lbranch.  If the delay slot is empty or this is a
6620
           nullified forward branch, the instruction after the reversed
6621
           condition branch must be nullified.  */
6622
        if (dbr_sequence_length () == 0
6623
            || (nullify && forward_branch_p (insn)))
6624
          {
6625
            nullify = 1;
6626
            xdelay = 0;
6627
            operands[4] = GEN_INT (length);
6628
          }
6629
        else
6630
          {
6631
            xdelay = 1;
6632
            operands[4] = GEN_INT (length + 4);
6633
          }
6634
 
6635
        if (GET_MODE (operands[0]) == DImode)
6636
          strcpy (buf, "bb,*");
6637
        else
6638
          strcpy (buf, "{bvb,|bb,}");
6639
        if ((which == 0 && negated)
6640
            || (which == 1 && !negated))
6641
          strcat (buf, "<");
6642
        else
6643
          strcat (buf, ">=");
6644
        if (nullify)
6645
          strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
6646
        else
6647
          strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
6648
        output_asm_insn (buf, operands);
6649
        return output_lbranch (negated ? operands[3] : operands[2],
6650
                               insn, xdelay);
6651
    }
6652
  return buf;
6653
}
6654
 
6655
/* Return the output template for emitting a dbra type insn.
6656
 
6657
   Note it may perform some output operations on its own before
6658
   returning the final output string.  */
6659
const char *
6660
output_dbra (rtx *operands, rtx insn, int which_alternative)
6661
{
6662
  int length = get_attr_length (insn);
6663
 
6664
  /* A conditional branch to the following instruction (e.g. the delay slot) is
6665
     asking for a disaster.  Be prepared!  */
6666
 
6667
  if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6668
    {
6669
      if (which_alternative == 0)
6670
        return "ldo %1(%0),%0";
6671
      else if (which_alternative == 1)
6672
        {
6673
          output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6674
          output_asm_insn ("ldw -16(%%r30),%4", operands);
6675
          output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6676
          return "{fldws|fldw} -16(%%r30),%0";
6677
        }
6678
      else
6679
        {
6680
          output_asm_insn ("ldw %0,%4", operands);
6681
          return "ldo %1(%4),%4\n\tstw %4,%0";
6682
        }
6683
    }
6684
 
6685
  if (which_alternative == 0)
6686
    {
6687
      int nullify = INSN_ANNULLED_BRANCH_P (insn);
6688
      int xdelay;
6689
 
6690
      /* If this is a long branch with its delay slot unfilled, set `nullify'
6691
         as it can nullify the delay slot and save a nop.  */
6692
      if (length == 8 && dbr_sequence_length () == 0)
6693
        nullify = 1;
6694
 
6695
      /* If this is a short forward conditional branch which did not get
6696
         its delay slot filled, the delay slot can still be nullified.  */
6697
      if (! nullify && length == 4 && dbr_sequence_length () == 0)
6698
        nullify = forward_branch_p (insn);
6699
 
6700
      switch (length)
6701
        {
6702
        case 4:
6703
          if (nullify)
6704
            return "addib,%C2,n %1,%0,%3";
6705
          else
6706
            return "addib,%C2 %1,%0,%3";
6707
 
6708
        case 8:
6709
          /* Handle weird backwards branch with a fulled delay slot
6710
             which is nullified.  */
6711
          if (dbr_sequence_length () != 0
6712
              && ! forward_branch_p (insn)
6713
              && nullify)
6714
            return "addib,%N2,n %1,%0,.+12\n\tb %3";
6715
          /* Handle short backwards branch with an unfilled delay slot.
6716
             Using a addb;nop rather than addi;bl saves 1 cycle for both
6717
             taken and untaken branches.  */
6718
          else if (dbr_sequence_length () == 0
6719
                   && ! forward_branch_p (insn)
6720
                   && INSN_ADDRESSES_SET_P ()
6721
                   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6722
                                      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6723
              return "addib,%C2 %1,%0,%3%#";
6724
 
6725
          /* Handle normal cases.  */
6726
          if (nullify)
6727
            return "addi,%N2 %1,%0,%0\n\tb,n %3";
6728
          else
6729
            return "addi,%N2 %1,%0,%0\n\tb %3";
6730
 
6731
        default:
6732
          /* The reversed conditional branch must branch over one additional
6733
             instruction if the delay slot is filled and needs to be extracted
6734
             by output_lbranch.  If the delay slot is empty or this is a
6735
             nullified forward branch, the instruction after the reversed
6736
             condition branch must be nullified.  */
6737
          if (dbr_sequence_length () == 0
6738
              || (nullify && forward_branch_p (insn)))
6739
            {
6740
              nullify = 1;
6741
              xdelay = 0;
6742
              operands[4] = GEN_INT (length);
6743
            }
6744
          else
6745
            {
6746
              xdelay = 1;
6747
              operands[4] = GEN_INT (length + 4);
6748
            }
6749
 
6750
          if (nullify)
6751
            output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
6752
          else
6753
            output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
6754
 
6755
          return output_lbranch (operands[3], insn, xdelay);
6756
        }
6757
 
6758
    }
6759
  /* Deal with gross reload from FP register case.  */
6760
  else if (which_alternative == 1)
6761
    {
6762
      /* Move loop counter from FP register to MEM then into a GR,
6763
         increment the GR, store the GR into MEM, and finally reload
6764
         the FP register from MEM from within the branch's delay slot.  */
6765
      output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
6766
                       operands);
6767
      output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
6768
      if (length == 24)
6769
        return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
6770
      else if (length == 28)
6771
        return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6772
      else
6773
        {
6774
          operands[5] = GEN_INT (length - 16);
6775
          output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
6776
          output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6777
          return output_lbranch (operands[3], insn, 0);
6778
        }
6779
    }
6780
  /* Deal with gross reload from memory case.  */
6781
  else
6782
    {
6783
      /* Reload loop counter from memory, the store back to memory
6784
         happens in the branch's delay slot.  */
6785
      output_asm_insn ("ldw %0,%4", operands);
6786
      if (length == 12)
6787
        return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
6788
      else if (length == 16)
6789
        return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
6790
      else
6791
        {
6792
          operands[5] = GEN_INT (length - 4);
6793
          output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
6794
          return output_lbranch (operands[3], insn, 0);
6795
        }
6796
    }
6797
}
6798
 
6799
/* Return the output template for emitting a movb type insn.
6800
 
6801
   Note it may perform some output operations on its own before
6802
   returning the final output string.  */
6803
const char *
6804
output_movb (rtx *operands, rtx insn, int which_alternative,
6805
             int reverse_comparison)
6806
{
6807
  int length = get_attr_length (insn);
6808
 
6809
  /* A conditional branch to the following instruction (e.g. the delay slot) is
6810
     asking for a disaster.  Be prepared!  */
6811
 
6812
  if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
6813
    {
6814
      if (which_alternative == 0)
6815
        return "copy %1,%0";
6816
      else if (which_alternative == 1)
6817
        {
6818
          output_asm_insn ("stw %1,-16(%%r30)", operands);
6819
          return "{fldws|fldw} -16(%%r30),%0";
6820
        }
6821
      else if (which_alternative == 2)
6822
        return "stw %1,%0";
6823
      else
6824
        return "mtsar %r1";
6825
    }
6826
 
6827
  /* Support the second variant.  */
6828
  if (reverse_comparison)
6829
    PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
6830
 
6831
  if (which_alternative == 0)
6832
    {
6833
      int nullify = INSN_ANNULLED_BRANCH_P (insn);
6834
      int xdelay;
6835
 
6836
      /* If this is a long branch with its delay slot unfilled, set `nullify'
6837
         as it can nullify the delay slot and save a nop.  */
6838
      if (length == 8 && dbr_sequence_length () == 0)
6839
        nullify = 1;
6840
 
6841
      /* If this is a short forward conditional branch which did not get
6842
         its delay slot filled, the delay slot can still be nullified.  */
6843
      if (! nullify && length == 4 && dbr_sequence_length () == 0)
6844
        nullify = forward_branch_p (insn);
6845
 
6846
      switch (length)
6847
        {
6848
        case 4:
6849
          if (nullify)
6850
            return "movb,%C2,n %1,%0,%3";
6851
          else
6852
            return "movb,%C2 %1,%0,%3";
6853
 
6854
        case 8:
6855
          /* Handle weird backwards branch with a filled delay slot
6856
             which is nullified.  */
6857
          if (dbr_sequence_length () != 0
6858
              && ! forward_branch_p (insn)
6859
              && nullify)
6860
            return "movb,%N2,n %1,%0,.+12\n\tb %3";
6861
 
6862
          /* Handle short backwards branch with an unfilled delay slot.
6863
             Using a movb;nop rather than or;bl saves 1 cycle for both
6864
             taken and untaken branches.  */
6865
          else if (dbr_sequence_length () == 0
6866
                   && ! forward_branch_p (insn)
6867
                   && INSN_ADDRESSES_SET_P ()
6868
                   && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6869
                                      - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6870
            return "movb,%C2 %1,%0,%3%#";
6871
          /* Handle normal cases.  */
6872
          if (nullify)
6873
            return "or,%N2 %1,%%r0,%0\n\tb,n %3";
6874
          else
6875
            return "or,%N2 %1,%%r0,%0\n\tb %3";
6876
 
6877
        default:
6878
          /* The reversed conditional branch must branch over one additional
6879
             instruction if the delay slot is filled and needs to be extracted
6880
             by output_lbranch.  If the delay slot is empty or this is a
6881
             nullified forward branch, the instruction after the reversed
6882
             condition branch must be nullified.  */
6883
          if (dbr_sequence_length () == 0
6884
              || (nullify && forward_branch_p (insn)))
6885
            {
6886
              nullify = 1;
6887
              xdelay = 0;
6888
              operands[4] = GEN_INT (length);
6889
            }
6890
          else
6891
            {
6892
              xdelay = 1;
6893
              operands[4] = GEN_INT (length + 4);
6894
            }
6895
 
6896
          if (nullify)
6897
            output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
6898
          else
6899
            output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
6900
 
6901
          return output_lbranch (operands[3], insn, xdelay);
6902
        }
6903
    }
6904
  /* Deal with gross reload for FP destination register case.  */
6905
  else if (which_alternative == 1)
6906
    {
6907
      /* Move source register to MEM, perform the branch test, then
6908
         finally load the FP register from MEM from within the branch's
6909
         delay slot.  */
6910
      output_asm_insn ("stw %1,-16(%%r30)", operands);
6911
      if (length == 12)
6912
        return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
6913
      else if (length == 16)
6914
        return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
6915
      else
6916
        {
6917
          operands[4] = GEN_INT (length - 4);
6918
          output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
6919
          output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
6920
          return output_lbranch (operands[3], insn, 0);
6921
        }
6922
    }
6923
  /* Deal with gross reload from memory case.  */
6924
  else if (which_alternative == 2)
6925
    {
6926
      /* Reload loop counter from memory, the store back to memory
6927
         happens in the branch's delay slot.  */
6928
      if (length == 8)
6929
        return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
6930
      else if (length == 12)
6931
        return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
6932
      else
6933
        {
6934
          operands[4] = GEN_INT (length);
6935
          output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
6936
                           operands);
6937
          return output_lbranch (operands[3], insn, 0);
6938
        }
6939
    }
6940
  /* Handle SAR as a destination.  */
6941
  else
6942
    {
6943
      if (length == 8)
6944
        return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
6945
      else if (length == 12)
6946
        return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
6947
      else
6948
        {
6949
          operands[4] = GEN_INT (length);
6950
          output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
6951
                           operands);
6952
          return output_lbranch (operands[3], insn, 0);
6953
        }
6954
    }
6955
}
6956
 
6957
/* Copy any FP arguments in INSN into integer registers.  */
6958
static void
6959
copy_fp_args (rtx insn)
6960
{
6961
  rtx link;
6962
  rtx xoperands[2];
6963
 
6964
  for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6965
    {
6966
      int arg_mode, regno;
6967
      rtx use = XEXP (link, 0);
6968
 
6969
      if (! (GET_CODE (use) == USE
6970
          && GET_CODE (XEXP (use, 0)) == REG
6971
          && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
6972
        continue;
6973
 
6974
      arg_mode = GET_MODE (XEXP (use, 0));
6975
      regno = REGNO (XEXP (use, 0));
6976
 
6977
      /* Is it a floating point register?  */
6978
      if (regno >= 32 && regno <= 39)
6979
        {
6980
          /* Copy the FP register into an integer register via memory.  */
6981
          if (arg_mode == SFmode)
6982
            {
6983
              xoperands[0] = XEXP (use, 0);
6984
              xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
6985
              output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
6986
              output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6987
            }
6988
          else
6989
            {
6990
              xoperands[0] = XEXP (use, 0);
6991
              xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
6992
              output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
6993
              output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
6994
              output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
6995
            }
6996
        }
6997
    }
6998
}
6999
 
7000
/* Compute length of the FP argument copy sequence for INSN.  */
7001
static int
7002
length_fp_args (rtx insn)
7003
{
7004
  int length = 0;
7005
  rtx link;
7006
 
7007
  for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7008
    {
7009
      int arg_mode, regno;
7010
      rtx use = XEXP (link, 0);
7011
 
7012
      if (! (GET_CODE (use) == USE
7013
          && GET_CODE (XEXP (use, 0)) == REG
7014
          && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7015
        continue;
7016
 
7017
      arg_mode = GET_MODE (XEXP (use, 0));
7018
      regno = REGNO (XEXP (use, 0));
7019
 
7020
      /* Is it a floating point register?  */
7021
      if (regno >= 32 && regno <= 39)
7022
        {
7023
          if (arg_mode == SFmode)
7024
            length += 8;
7025
          else
7026
            length += 12;
7027
        }
7028
    }
7029
 
7030
  return length;
7031
}
7032
 
7033
/* Return the attribute length for the millicode call instruction INSN.
7034
   The length must match the code generated by output_millicode_call.
7035
   We include the delay slot in the returned length as it is better to
7036
   over estimate the length than to under estimate it.  */
7037
 
7038
int
7039
attr_length_millicode_call (rtx insn)
7040
{
7041
  unsigned long distance = -1;
7042
  unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7043
 
7044
  if (INSN_ADDRESSES_SET_P ())
7045
    {
7046
      distance = (total + insn_current_reference_address (insn));
7047
      if (distance < total)
7048
        distance = -1;
7049
    }
7050
 
7051
  if (TARGET_64BIT)
7052
    {
7053
      if (!TARGET_LONG_CALLS && distance < 7600000)
7054
        return 8;
7055
 
7056
      return 20;
7057
    }
7058
  else if (TARGET_PORTABLE_RUNTIME)
7059
    return 24;
7060
  else
7061
    {
7062
      if (!TARGET_LONG_CALLS && distance < 240000)
7063
        return 8;
7064
 
7065
      if (TARGET_LONG_ABS_CALL && !flag_pic)
7066
        return 12;
7067
 
7068
      return 24;
7069
    }
7070
}
7071
 
7072
/* INSN is a function call.  It may have an unconditional jump
7073
   in its delay slot.
7074
 
7075
   CALL_DEST is the routine we are calling.  */
7076
 
7077
const char *
7078
output_millicode_call (rtx insn, rtx call_dest)
7079
{
7080
  int attr_length = get_attr_length (insn);
7081
  int seq_length = dbr_sequence_length ();
7082
  int distance;
7083
  rtx seq_insn;
7084
  rtx xoperands[3];
7085
 
7086
  xoperands[0] = call_dest;
7087
  xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7088
 
7089
  /* Handle the common case where we are sure that the branch will
7090
     reach the beginning of the $CODE$ subspace.  The within reach
7091
     form of the $$sh_func_adrs call has a length of 28.  Because
7092
     it has an attribute type of multi, it never has a nonzero
7093
     sequence length.  The length of the $$sh_func_adrs is the same
7094
     as certain out of reach PIC calls to other routines.  */
7095
  if (!TARGET_LONG_CALLS
7096
      && ((seq_length == 0
7097
           && (attr_length == 12
7098
               || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7099
          || (seq_length != 0 && attr_length == 8)))
7100
    {
7101
      output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7102
    }
7103
  else
7104
    {
7105
      if (TARGET_64BIT)
7106
        {
7107
          /* It might seem that one insn could be saved by accessing
7108
             the millicode function using the linkage table.  However,
7109
             this doesn't work in shared libraries and other dynamically
7110
             loaded objects.  Using a pc-relative sequence also avoids
7111
             problems related to the implicit use of the gp register.  */
7112
          output_asm_insn ("b,l .+8,%%r1", xoperands);
7113
 
7114
          if (TARGET_GAS)
7115
            {
7116
              output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7117
              output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7118
            }
7119
          else
7120
            {
7121
              xoperands[1] = gen_label_rtx ();
7122
              output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7123
              (*targetm.asm_out.internal_label) (asm_out_file, "L",
7124
                                         CODE_LABEL_NUMBER (xoperands[1]));
7125
              output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7126
            }
7127
 
7128
          output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7129
        }
7130
      else if (TARGET_PORTABLE_RUNTIME)
7131
        {
7132
          /* Pure portable runtime doesn't allow be/ble; we also don't
7133
             have PIC support in the assembler/linker, so this sequence
7134
             is needed.  */
7135
 
7136
          /* Get the address of our target into %r1.  */
7137
          output_asm_insn ("ldil L'%0,%%r1", xoperands);
7138
          output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7139
 
7140
          /* Get our return address into %r31.  */
7141
          output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7142
          output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7143
 
7144
          /* Jump to our target address in %r1.  */
7145
          output_asm_insn ("bv %%r0(%%r1)", xoperands);
7146
        }
7147
      else if (!flag_pic)
7148
        {
7149
          output_asm_insn ("ldil L'%0,%%r1", xoperands);
7150
          if (TARGET_PA_20)
7151
            output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7152
          else
7153
            output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7154
        }
7155
      else
7156
        {
7157
          output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7158
          output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7159
 
7160
          if (TARGET_SOM || !TARGET_GAS)
7161
            {
7162
              /* The HP assembler can generate relocations for the
7163
                 difference of two symbols.  GAS can do this for a
7164
                 millicode symbol but not an arbitrary external
7165
                 symbol when generating SOM output.  */
7166
              xoperands[1] = gen_label_rtx ();
7167
              (*targetm.asm_out.internal_label) (asm_out_file, "L",
7168
                                         CODE_LABEL_NUMBER (xoperands[1]));
7169
              output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7170
              output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7171
            }
7172
          else
7173
            {
7174
              output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7175
              output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7176
                               xoperands);
7177
            }
7178
 
7179
          /* Jump to our target address in %r1.  */
7180
          output_asm_insn ("bv %%r0(%%r1)", xoperands);
7181
        }
7182
    }
7183
 
7184
  if (seq_length == 0)
7185
    output_asm_insn ("nop", xoperands);
7186
 
7187
  /* We are done if there isn't a jump in the delay slot.  */
7188
  if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7189
    return "";
7190
 
7191
  /* This call has an unconditional jump in its delay slot.  */
7192
  xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7193
 
7194
  /* See if the return address can be adjusted.  Use the containing
7195
     sequence insn's address.  */
7196
  if (INSN_ADDRESSES_SET_P ())
7197
    {
7198
      seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7199
      distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7200
                  - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7201
 
7202
      if (VAL_14_BITS_P (distance))
7203
        {
7204
          xoperands[1] = gen_label_rtx ();
7205
          output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
7206
          (*targetm.asm_out.internal_label) (asm_out_file, "L",
7207
                                             CODE_LABEL_NUMBER (xoperands[1]));
7208
        }
7209
      else
7210
        /* ??? This branch may not reach its target.  */
7211
        output_asm_insn ("nop\n\tb,n %0", xoperands);
7212
    }
7213
  else
7214
    /* ??? This branch may not reach its target.  */
7215
    output_asm_insn ("nop\n\tb,n %0", xoperands);
7216
 
7217
  /* Delete the jump.  */
7218
  PUT_CODE (NEXT_INSN (insn), NOTE);
7219
  NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7220
  NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7221
 
7222
  return "";
7223
}
7224
 
7225
/* Return the attribute length of the call instruction INSN.  The SIBCALL
7226
   flag indicates whether INSN is a regular call or a sibling call.  The
7227
   length returned must be longer than the code actually generated by
7228
   output_call.  Since branch shortening is done before delay branch
7229
   sequencing, there is no way to determine whether or not the delay
7230
   slot will be filled during branch shortening.  Even when the delay
7231
   slot is filled, we may have to add a nop if the delay slot contains
7232
   a branch that can't reach its target.  Thus, we always have to include
7233
   the delay slot in the length estimate.  This used to be done in
7234
   pa_adjust_insn_length but we do it here now as some sequences always
7235
   fill the delay slot and we can save four bytes in the estimate for
7236
   these sequences.  */
7237
 
7238
int
7239
attr_length_call (rtx insn, int sibcall)
7240
{
7241
  int local_call;
7242
  rtx call_dest;
7243
  tree call_decl;
7244
  int length = 0;
7245
  rtx pat = PATTERN (insn);
7246
  unsigned long distance = -1;
7247
 
7248
  if (INSN_ADDRESSES_SET_P ())
7249
    {
7250
      unsigned long total;
7251
 
7252
      total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7253
      distance = (total + insn_current_reference_address (insn));
7254
      if (distance < total)
7255
        distance = -1;
7256
    }
7257
 
7258
  /* Determine if this is a local call.  */
7259
  if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL)
7260
    call_dest = XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0);
7261
  else
7262
    call_dest = XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0);
7263
 
7264
  call_decl = SYMBOL_REF_DECL (call_dest);
7265
  local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7266
 
7267
  /* pc-relative branch.  */
7268
  if (!TARGET_LONG_CALLS
7269
      && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7270
          || distance < 240000))
7271
    length += 8;
7272
 
7273
  /* 64-bit plabel sequence.  */
7274
  else if (TARGET_64BIT && !local_call)
7275
    length += sibcall ? 28 : 24;
7276
 
7277
  /* non-pic long absolute branch sequence.  */
7278
  else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7279
    length += 12;
7280
 
7281
  /* long pc-relative branch sequence.  */
7282
  else if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7283
           || (TARGET_64BIT && !TARGET_GAS)
7284
           || (TARGET_GAS && !TARGET_SOM
7285
               && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
7286
    {
7287
      length += 20;
7288
 
7289
      if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS)
7290
        length += 8;
7291
    }
7292
 
7293
  /* 32-bit plabel sequence.  */
7294
  else
7295
    {
7296
      length += 32;
7297
 
7298
      if (TARGET_SOM)
7299
        length += length_fp_args (insn);
7300
 
7301
      if (flag_pic)
7302
        length += 4;
7303
 
7304
      if (!TARGET_PA_20)
7305
        {
7306
          if (!sibcall)
7307
            length += 8;
7308
 
7309
          if (!TARGET_NO_SPACE_REGS)
7310
            length += 8;
7311
        }
7312
    }
7313
 
7314
  return length;
7315
}
7316
 
7317
/* INSN is a function call.  It may have an unconditional jump
7318
   in its delay slot.
7319
 
7320
   CALL_DEST is the routine we are calling.  */
7321
 
7322
const char *
7323
output_call (rtx insn, rtx call_dest, int sibcall)
7324
{
7325
  int delay_insn_deleted = 0;
7326
  int delay_slot_filled = 0;
7327
  int seq_length = dbr_sequence_length ();
7328
  tree call_decl = SYMBOL_REF_DECL (call_dest);
7329
  int local_call = call_decl && (*targetm.binds_local_p) (call_decl);
7330
  rtx xoperands[2];
7331
 
7332
  xoperands[0] = call_dest;
7333
 
7334
  /* Handle the common case where we're sure that the branch will reach
7335
     the beginning of the "$CODE$" subspace.  This is the beginning of
7336
     the current function if we are in a named section.  */
7337
  if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
7338
    {
7339
      xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7340
      output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7341
    }
7342
  else
7343
    {
7344
      if (TARGET_64BIT && !local_call)
7345
        {
7346
          /* ??? As far as I can tell, the HP linker doesn't support the
7347
             long pc-relative sequence described in the 64-bit runtime
7348
             architecture.  So, we use a slightly longer indirect call.  */
7349
          xoperands[0] = get_deferred_plabel (call_dest);
7350
          xoperands[1] = gen_label_rtx ();
7351
 
7352
          /* If this isn't a sibcall, we put the load of %r27 into the
7353
             delay slot.  We can't do this in a sibcall as we don't
7354
             have a second call-clobbered scratch register available.  */
7355
          if (seq_length != 0
7356
              && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7357
              && !sibcall)
7358
            {
7359
              final_scan_insn (NEXT_INSN (insn), asm_out_file,
7360
                               optimize, 0, NULL);
7361
 
7362
              /* Now delete the delay insn.  */
7363
              PUT_CODE (NEXT_INSN (insn), NOTE);
7364
              NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7365
              NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7366
              delay_insn_deleted = 1;
7367
            }
7368
 
7369
          output_asm_insn ("addil LT'%0,%%r27", xoperands);
7370
          output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7371
          output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7372
 
7373
          if (sibcall)
7374
            {
7375
              output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7376
              output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7377
              output_asm_insn ("bve (%%r1)", xoperands);
7378
            }
7379
          else
7380
            {
7381
              output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7382
              output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7383
              output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7384
              delay_slot_filled = 1;
7385
            }
7386
        }
7387
      else
7388
        {
7389
          int indirect_call = 0;
7390
 
7391
          /* Emit a long call.  There are several different sequences
7392
             of increasing length and complexity.  In most cases,
7393
             they don't allow an instruction in the delay slot.  */
7394
          if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7395
              && !(TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7396
              && !(TARGET_GAS && !TARGET_SOM
7397
                   && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7398
              && !TARGET_64BIT)
7399
            indirect_call = 1;
7400
 
7401
          if (seq_length != 0
7402
              && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7403
              && !sibcall
7404
              && (!TARGET_PA_20 || indirect_call))
7405
            {
7406
              /* A non-jump insn in the delay slot.  By definition we can
7407
                 emit this insn before the call (and in fact before argument
7408
                 relocating.  */
7409
              final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7410
                               NULL);
7411
 
7412
              /* Now delete the delay insn.  */
7413
              PUT_CODE (NEXT_INSN (insn), NOTE);
7414
              NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7415
              NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7416
              delay_insn_deleted = 1;
7417
            }
7418
 
7419
          if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7420
            {
7421
              /* This is the best sequence for making long calls in
7422
                 non-pic code.  Unfortunately, GNU ld doesn't provide
7423
                 the stub needed for external calls, and GAS's support
7424
                 for this with the SOM linker is buggy.  It is safe
7425
                 to use this for local calls.  */
7426
              output_asm_insn ("ldil L'%0,%%r1", xoperands);
7427
              if (sibcall)
7428
                output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7429
              else
7430
                {
7431
                  if (TARGET_PA_20)
7432
                    output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7433
                                     xoperands);
7434
                  else
7435
                    output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7436
 
7437
                  output_asm_insn ("copy %%r31,%%r2", xoperands);
7438
                  delay_slot_filled = 1;
7439
                }
7440
            }
7441
          else
7442
            {
7443
              if ((TARGET_SOM && TARGET_LONG_PIC_SDIFF_CALL)
7444
                  || (TARGET_64BIT && !TARGET_GAS))
7445
                {
7446
                  /* The HP assembler and linker can handle relocations
7447
                     for the difference of two symbols.  GAS and the HP
7448
                     linker can't do this when one of the symbols is
7449
                     external.  */
7450
                  xoperands[1] = gen_label_rtx ();
7451
                  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7452
                  output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7453
                  (*targetm.asm_out.internal_label) (asm_out_file, "L",
7454
                                             CODE_LABEL_NUMBER (xoperands[1]));
7455
                  output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7456
                }
7457
              else if (TARGET_GAS && !TARGET_SOM
7458
                       && (TARGET_LONG_PIC_PCREL_CALL || local_call))
7459
                {
7460
                  /*  GAS currently can't generate the relocations that
7461
                      are needed for the SOM linker under HP-UX using this
7462
                      sequence.  The GNU linker doesn't generate the stubs
7463
                      that are needed for external calls on TARGET_ELF32
7464
                      with this sequence.  For now, we have to use a
7465
                      longer plabel sequence when using GAS.  */
7466
                  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7467
                  output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7468
                                   xoperands);
7469
                  output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7470
                                   xoperands);
7471
                }
7472
              else
7473
                {
7474
                  /* Emit a long plabel-based call sequence.  This is
7475
                     essentially an inline implementation of $$dyncall.
7476
                     We don't actually try to call $$dyncall as this is
7477
                     as difficult as calling the function itself.  */
7478
                  xoperands[0] = get_deferred_plabel (call_dest);
7479
                  xoperands[1] = gen_label_rtx ();
7480
 
7481
                  /* Since the call is indirect, FP arguments in registers
7482
                     need to be copied to the general registers.  Then, the
7483
                     argument relocation stub will copy them back.  */
7484
                  if (TARGET_SOM)
7485
                    copy_fp_args (insn);
7486
 
7487
                  if (flag_pic)
7488
                    {
7489
                      output_asm_insn ("addil LT'%0,%%r19", xoperands);
7490
                      output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7491
                      output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7492
                    }
7493
                  else
7494
                    {
7495
                      output_asm_insn ("addil LR'%0-$global$,%%r27",
7496
                                       xoperands);
7497
                      output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7498
                                       xoperands);
7499
                    }
7500
 
7501
                  output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7502
                  output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7503
                  output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7504
                  output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
7505
 
7506
                  if (!sibcall && !TARGET_PA_20)
7507
                    {
7508
                      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
7509
                      if (TARGET_NO_SPACE_REGS)
7510
                        output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7511
                      else
7512
                        output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
7513
                    }
7514
                }
7515
 
7516
              if (TARGET_PA_20)
7517
                {
7518
                  if (sibcall)
7519
                    output_asm_insn ("bve (%%r1)", xoperands);
7520
                  else
7521
                    {
7522
                      if (indirect_call)
7523
                        {
7524
                          output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7525
                          output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7526
                          delay_slot_filled = 1;
7527
                        }
7528
                      else
7529
                        output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7530
                    }
7531
                }
7532
              else
7533
                {
7534
                  if (!TARGET_NO_SPACE_REGS)
7535
                    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7536
                                     xoperands);
7537
 
7538
                  if (sibcall)
7539
                    {
7540
                      if (TARGET_NO_SPACE_REGS)
7541
                        output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7542
                      else
7543
                        output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7544
                    }
7545
                  else
7546
                    {
7547
                      if (TARGET_NO_SPACE_REGS)
7548
                        output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7549
                      else
7550
                        output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
7551
 
7552
                      if (indirect_call)
7553
                        output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7554
                      else
7555
                        output_asm_insn ("copy %%r31,%%r2", xoperands);
7556
                      delay_slot_filled = 1;
7557
                    }
7558
                }
7559
            }
7560
        }
7561
    }
7562
 
7563
  if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
7564
    output_asm_insn ("nop", xoperands);
7565
 
7566
  /* We are done if there isn't a jump in the delay slot.  */
7567
  if (seq_length == 0
7568
      || delay_insn_deleted
7569
      || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7570
    return "";
7571
 
7572
  /* A sibcall should never have a branch in the delay slot.  */
7573
  gcc_assert (!sibcall);
7574
 
7575
  /* This call has an unconditional jump in its delay slot.  */
7576
  xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
7577
 
7578
  if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
7579
    {
7580
      /* See if the return address can be adjusted.  Use the containing
7581
         sequence insn's address.  */
7582
      rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7583
      int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7584
                      - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7585
 
7586
      if (VAL_14_BITS_P (distance))
7587
        {
7588
          xoperands[1] = gen_label_rtx ();
7589
          output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
7590
          (*targetm.asm_out.internal_label) (asm_out_file, "L",
7591
                                             CODE_LABEL_NUMBER (xoperands[1]));
7592
        }
7593
      else
7594
        output_asm_insn ("nop\n\tb,n %0", xoperands);
7595
    }
7596
  else
7597
    output_asm_insn ("b,n %0", xoperands);
7598
 
7599
  /* Delete the jump.  */
7600
  PUT_CODE (NEXT_INSN (insn), NOTE);
7601
  NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
7602
  NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
7603
 
7604
  return "";
7605
}
7606
 
7607
/* Return the attribute length of the indirect call instruction INSN.
7608
   The length must match the code generated by output_indirect call.
7609
   The returned length includes the delay slot.  Currently, the delay
7610
   slot of an indirect call sequence is not exposed and it is used by
7611
   the sequence itself.  */
7612
 
7613
int
7614
attr_length_indirect_call (rtx insn)
7615
{
7616
  unsigned long distance = -1;
7617
  unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7618
 
7619
  if (INSN_ADDRESSES_SET_P ())
7620
    {
7621
      distance = (total + insn_current_reference_address (insn));
7622
      if (distance < total)
7623
        distance = -1;
7624
    }
7625
 
7626
  if (TARGET_64BIT)
7627
    return 12;
7628
 
7629
  if (TARGET_FAST_INDIRECT_CALLS
7630
      || (!TARGET_PORTABLE_RUNTIME
7631
          && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7632
              || distance < 240000)))
7633
    return 8;
7634
 
7635
  if (flag_pic)
7636
    return 24;
7637
 
7638
  if (TARGET_PORTABLE_RUNTIME)
7639
    return 20;
7640
 
7641
  /* Out of reach, can use ble.  */
7642
  return 12;
7643
}
7644
 
7645
const char *
7646
output_indirect_call (rtx insn, rtx call_dest)
7647
{
7648
  rtx xoperands[1];
7649
 
7650
  if (TARGET_64BIT)
7651
    {
7652
      xoperands[0] = call_dest;
7653
      output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7654
      output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7655
      return "";
7656
    }
7657
 
7658
  /* First the special case for kernels, level 0 systems, etc.  */
7659
  if (TARGET_FAST_INDIRECT_CALLS)
7660
    return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7661
 
7662
  /* Now the normal case -- we can reach $$dyncall directly or
7663
     we're sure that we can get there via a long-branch stub.
7664
 
7665
     No need to check target flags as the length uniquely identifies
7666
     the remaining cases.  */
7667
  if (attr_length_indirect_call (insn) == 8)
7668
    {
7669
      /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7670
         $$dyncall.  Since BLE uses %r31 as the link register, the 22-bit
7671
         variant of the B,L instruction can't be used on the SOM target.  */
7672
      if (TARGET_PA_20 && !TARGET_SOM)
7673
        return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7674
      else
7675
        return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7676
    }
7677
 
7678
  /* Long millicode call, but we are not generating PIC or portable runtime
7679
     code.  */
7680
  if (attr_length_indirect_call (insn) == 12)
7681
    return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7682
 
7683
  /* Long millicode call for portable runtime.  */
7684
  if (attr_length_indirect_call (insn) == 20)
7685
    return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7686
 
7687
  /* We need a long PIC call to $$dyncall.  */
7688
  xoperands[0] = NULL_RTX;
7689
  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7690
  if (TARGET_SOM || !TARGET_GAS)
7691
    {
7692
      xoperands[0] = gen_label_rtx ();
7693
      output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
7694
      (*targetm.asm_out.internal_label) (asm_out_file, "L",
7695
                                         CODE_LABEL_NUMBER (xoperands[0]));
7696
      output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
7697
    }
7698
  else
7699
    {
7700
      output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
7701
      output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7702
                       xoperands);
7703
    }
7704
  output_asm_insn ("blr %%r0,%%r2", xoperands);
7705
  output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
7706
  return "";
7707
}
7708
 
7709
/* Return the total length of the save and restore instructions needed for
7710
   the data linkage table pointer (i.e., the PIC register) across the call
7711
   instruction INSN.  No-return calls do not require a save and restore.
7712
   In addition, we may be able to avoid the save and restore for calls
7713
   within the same translation unit.  */
7714
 
7715
int
7716
attr_length_save_restore_dltp (rtx insn)
7717
{
7718
  if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
7719
    return 0;
7720
 
7721
  return 8;
7722
}
7723
 
7724
/* In HPUX 8.0's shared library scheme, special relocations are needed
7725
   for function labels if they might be passed to a function
7726
   in a shared library (because shared libraries don't live in code
7727
   space), and special magic is needed to construct their address.  */
7728
 
7729
void
7730
hppa_encode_label (rtx sym)
7731
{
7732
  const char *str = XSTR (sym, 0);
7733
  int len = strlen (str) + 1;
7734
  char *newstr, *p;
7735
 
7736
  p = newstr = alloca (len + 1);
7737
  *p++ = '@';
7738
  strcpy (p, str);
7739
 
7740
  XSTR (sym, 0) = ggc_alloc_string (newstr, len);
7741
}
7742
 
7743
static void
7744
pa_encode_section_info (tree decl, rtx rtl, int first)
7745
{
7746
  default_encode_section_info (decl, rtl, first);
7747
 
7748
  if (first && TEXT_SPACE_P (decl))
7749
    {
7750
      SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
7751
      if (TREE_CODE (decl) == FUNCTION_DECL)
7752
        hppa_encode_label (XEXP (rtl, 0));
7753
    }
7754
}
7755
 
7756
/* This is sort of inverse to pa_encode_section_info.  */
7757
 
7758
static const char *
7759
pa_strip_name_encoding (const char *str)
7760
{
7761
  str += (*str == '@');
7762
  str += (*str == '*');
7763
  return str;
7764
}
7765
 
7766
int
7767
function_label_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7768
{
7769
  return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0));
7770
}
7771
 
7772
/* Returns 1 if OP is a function label involved in a simple addition
7773
   with a constant.  Used to keep certain patterns from matching
7774
   during instruction combination.  */
7775
int
7776
is_function_label_plus_const (rtx op)
7777
{
7778
  /* Strip off any CONST.  */
7779
  if (GET_CODE (op) == CONST)
7780
    op = XEXP (op, 0);
7781
 
7782
  return (GET_CODE (op) == PLUS
7783
          && function_label_operand (XEXP (op, 0), Pmode)
7784
          && GET_CODE (XEXP (op, 1)) == CONST_INT);
7785
}
7786
 
7787
/* Output assembly code for a thunk to FUNCTION.  */
7788
 
7789
static void
7790
pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
7791
                        HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
7792
                        tree function)
7793
{
7794
  static unsigned int current_thunk_number;
7795
  int val_14 = VAL_14_BITS_P (delta);
7796
  int nbytes = 0;
7797
  char label[16];
7798
  rtx xoperands[4];
7799
 
7800
  xoperands[0] = XEXP (DECL_RTL (function), 0);
7801
  xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
7802
  xoperands[2] = GEN_INT (delta);
7803
 
7804
  ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
7805
  fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
7806
 
7807
  /* Output the thunk.  We know that the function is in the same
7808
     translation unit (i.e., the same space) as the thunk, and that
7809
     thunks are output after their method.  Thus, we don't need an
7810
     external branch to reach the function.  With SOM and GAS,
7811
     functions and thunks are effectively in different sections.
7812
     Thus, we can always use a IA-relative branch and the linker
7813
     will add a long branch stub if necessary.
7814
 
7815
     However, we have to be careful when generating PIC code on the
7816
     SOM port to ensure that the sequence does not transfer to an
7817
     import stub for the target function as this could clobber the
7818
     return value saved at SP-24.  This would also apply to the
7819
     32-bit linux port if the multi-space model is implemented.  */
7820
  if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7821
       && !(flag_pic && TREE_PUBLIC (function))
7822
       && (TARGET_GAS || last_address < 262132))
7823
      || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
7824
          && ((targetm.have_named_sections
7825
               && DECL_SECTION_NAME (thunk_fndecl) != NULL
7826
               /* The GNU 64-bit linker has rather poor stub management.
7827
                  So, we use a long branch from thunks that aren't in
7828
                  the same section as the target function.  */
7829
               && ((!TARGET_64BIT
7830
                    && (DECL_SECTION_NAME (thunk_fndecl)
7831
                        != DECL_SECTION_NAME (function)))
7832
                   || ((DECL_SECTION_NAME (thunk_fndecl)
7833
                        == DECL_SECTION_NAME (function))
7834
                       && last_address < 262132)))
7835
              || (!targetm.have_named_sections && last_address < 262132))))
7836
    {
7837
      if (!val_14)
7838
        output_asm_insn ("addil L'%2,%%r26", xoperands);
7839
 
7840
      output_asm_insn ("b %0", xoperands);
7841
 
7842
      if (val_14)
7843
        {
7844
          output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7845
          nbytes += 8;
7846
        }
7847
      else
7848
        {
7849
          output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7850
          nbytes += 12;
7851
        }
7852
    }
7853
  else if (TARGET_64BIT)
7854
    {
7855
      /* We only have one call-clobbered scratch register, so we can't
7856
         make use of the delay slot if delta doesn't fit in 14 bits.  */
7857
      if (!val_14)
7858
        {
7859
          output_asm_insn ("addil L'%2,%%r26", xoperands);
7860
          output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7861
        }
7862
 
7863
      output_asm_insn ("b,l .+8,%%r1", xoperands);
7864
 
7865
      if (TARGET_GAS)
7866
        {
7867
          output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7868
          output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7869
        }
7870
      else
7871
        {
7872
          xoperands[3] = GEN_INT (val_14 ? 8 : 16);
7873
          output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
7874
        }
7875
 
7876
      if (val_14)
7877
        {
7878
          output_asm_insn ("bv %%r0(%%r1)", xoperands);
7879
          output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7880
          nbytes += 20;
7881
        }
7882
      else
7883
        {
7884
          output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
7885
          nbytes += 24;
7886
        }
7887
    }
7888
  else if (TARGET_PORTABLE_RUNTIME)
7889
    {
7890
      output_asm_insn ("ldil L'%0,%%r1", xoperands);
7891
      output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
7892
 
7893
      if (!val_14)
7894
        output_asm_insn ("addil L'%2,%%r26", xoperands);
7895
 
7896
      output_asm_insn ("bv %%r0(%%r22)", xoperands);
7897
 
7898
      if (val_14)
7899
        {
7900
          output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7901
          nbytes += 16;
7902
        }
7903
      else
7904
        {
7905
          output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7906
          nbytes += 20;
7907
        }
7908
    }
7909
  else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
7910
    {
7911
      /* The function is accessible from outside this module.  The only
7912
         way to avoid an import stub between the thunk and function is to
7913
         call the function directly with an indirect sequence similar to
7914
         that used by $$dyncall.  This is possible because $$dyncall acts
7915
         as the import stub in an indirect call.  */
7916
      ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
7917
      xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
7918
      output_asm_insn ("addil LT'%3,%%r19", xoperands);
7919
      output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
7920
      output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7921
      output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
7922
      output_asm_insn ("depi 0,31,2,%%r22", xoperands);
7923
      output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
7924
      output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
7925
 
7926
      if (!val_14)
7927
        {
7928
          output_asm_insn ("addil L'%2,%%r26", xoperands);
7929
          nbytes += 4;
7930
        }
7931
 
7932
      if (TARGET_PA_20)
7933
        {
7934
          output_asm_insn ("bve (%%r22)", xoperands);
7935
          nbytes += 36;
7936
        }
7937
      else if (TARGET_NO_SPACE_REGS)
7938
        {
7939
          output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
7940
          nbytes += 36;
7941
        }
7942
      else
7943
        {
7944
          output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
7945
          output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
7946
          output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
7947
          nbytes += 44;
7948
        }
7949
 
7950
      if (val_14)
7951
        output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7952
      else
7953
        output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7954
    }
7955
  else if (flag_pic)
7956
    {
7957
      output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7958
 
7959
      if (TARGET_SOM || !TARGET_GAS)
7960
        {
7961
          output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
7962
          output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
7963
        }
7964
      else
7965
        {
7966
          output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7967
          output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
7968
        }
7969
 
7970
      if (!val_14)
7971
        output_asm_insn ("addil L'%2,%%r26", xoperands);
7972
 
7973
      output_asm_insn ("bv %%r0(%%r22)", xoperands);
7974
 
7975
      if (val_14)
7976
        {
7977
          output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7978
          nbytes += 20;
7979
        }
7980
      else
7981
        {
7982
          output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
7983
          nbytes += 24;
7984
        }
7985
    }
7986
  else
7987
    {
7988
      if (!val_14)
7989
        output_asm_insn ("addil L'%2,%%r26", xoperands);
7990
 
7991
      output_asm_insn ("ldil L'%0,%%r22", xoperands);
7992
      output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
7993
 
7994
      if (val_14)
7995
        {
7996
          output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
7997
          nbytes += 12;
7998
        }
7999
      else
8000
        {
8001
          output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8002
          nbytes += 16;
8003
        }
8004
    }
8005
 
8006
  fprintf (file, "\t.EXIT\n\t.PROCEND\n");
8007
 
8008
  if (TARGET_SOM && TARGET_GAS)
8009
    {
8010
      /* We done with this subspace except possibly for some additional
8011
         debug information.  Forget that we are in this subspace to ensure
8012
         that the next function is output in its own subspace.  */
8013
      in_section = NULL;
8014
      cfun->machine->in_nsubspa = 2;
8015
    }
8016
 
8017
  if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8018
    {
8019
      switch_to_section (data_section);
8020
      output_asm_insn (".align 4", xoperands);
8021
      ASM_OUTPUT_LABEL (file, label);
8022
      output_asm_insn (".word P'%0", xoperands);
8023
    }
8024
 
8025
  current_thunk_number++;
8026
  nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8027
            & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8028
  last_address += nbytes;
8029
  update_total_code_bytes (nbytes);
8030
}
8031
 
8032
/* Only direct calls to static functions are allowed to be sibling (tail)
8033
   call optimized.
8034
 
8035
   This restriction is necessary because some linker generated stubs will
8036
   store return pointers into rp' in some cases which might clobber a
8037
   live value already in rp'.
8038
 
8039
   In a sibcall the current function and the target function share stack
8040
   space.  Thus if the path to the current function and the path to the
8041
   target function save a value in rp', they save the value into the
8042
   same stack slot, which has undesirable consequences.
8043
 
8044
   Because of the deferred binding nature of shared libraries any function
8045
   with external scope could be in a different load module and thus require
8046
   rp' to be saved when calling that function.  So sibcall optimizations
8047
   can only be safe for static function.
8048
 
8049
   Note that GCC never needs return value relocations, so we don't have to
8050
   worry about static calls with return value relocations (which require
8051
   saving rp').
8052
 
8053
   It is safe to perform a sibcall optimization when the target function
8054
   will never return.  */
8055
static bool
8056
pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8057
{
8058
  if (TARGET_PORTABLE_RUNTIME)
8059
    return false;
8060
 
8061
  /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8062
     single subspace mode and the call is not indirect.  As far as I know,
8063
     there is no operating system support for the multiple subspace mode.
8064
     It might be possible to support indirect calls if we didn't use
8065
     $$dyncall (see the indirect sequence generated in output_call).  */
8066
  if (TARGET_ELF32)
8067
    return (decl != NULL_TREE);
8068
 
8069
  /* Sibcalls are not ok because the arg pointer register is not a fixed
8070
     register.  This prevents the sibcall optimization from occurring.  In
8071
     addition, there are problems with stub placement using GNU ld.  This
8072
     is because a normal sibcall branch uses a 17-bit relocation while
8073
     a regular call branch uses a 22-bit relocation.  As a result, more
8074
     care needs to be taken in the placement of long-branch stubs.  */
8075
  if (TARGET_64BIT)
8076
    return false;
8077
 
8078
  /* Sibcalls are only ok within a translation unit.  */
8079
  return (decl && !TREE_PUBLIC (decl));
8080
}
8081
 
8082
/* ??? Addition is not commutative on the PA due to the weird implicit
8083
   space register selection rules for memory addresses.  Therefore, we
8084
   don't consider a + b == b + a, as this might be inside a MEM.  */
8085
static bool
8086
pa_commutative_p (rtx x, int outer_code)
8087
{
8088
  return (COMMUTATIVE_P (x)
8089
          && (TARGET_NO_SPACE_REGS
8090
              || (outer_code != UNKNOWN && outer_code != MEM)
8091
              || GET_CODE (x) != PLUS));
8092
}
8093
 
8094
/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8095
   use in fmpyadd instructions.  */
8096
int
8097
fmpyaddoperands (rtx *operands)
8098
{
8099
  enum machine_mode mode = GET_MODE (operands[0]);
8100
 
8101
  /* Must be a floating point mode.  */
8102
  if (mode != SFmode && mode != DFmode)
8103
    return 0;
8104
 
8105
  /* All modes must be the same.  */
8106
  if (! (mode == GET_MODE (operands[1])
8107
         && mode == GET_MODE (operands[2])
8108
         && mode == GET_MODE (operands[3])
8109
         && mode == GET_MODE (operands[4])
8110
         && mode == GET_MODE (operands[5])))
8111
    return 0;
8112
 
8113
  /* All operands must be registers.  */
8114
  if (! (GET_CODE (operands[1]) == REG
8115
         && GET_CODE (operands[2]) == REG
8116
         && GET_CODE (operands[3]) == REG
8117
         && GET_CODE (operands[4]) == REG
8118
         && GET_CODE (operands[5]) == REG))
8119
    return 0;
8120
 
8121
  /* Only 2 real operands to the addition.  One of the input operands must
8122
     be the same as the output operand.  */
8123
  if (! rtx_equal_p (operands[3], operands[4])
8124
      && ! rtx_equal_p (operands[3], operands[5]))
8125
    return 0;
8126
 
8127
  /* Inout operand of add cannot conflict with any operands from multiply.  */
8128
  if (rtx_equal_p (operands[3], operands[0])
8129
     || rtx_equal_p (operands[3], operands[1])
8130
     || rtx_equal_p (operands[3], operands[2]))
8131
    return 0;
8132
 
8133
  /* multiply cannot feed into addition operands.  */
8134
  if (rtx_equal_p (operands[4], operands[0])
8135
      || rtx_equal_p (operands[5], operands[0]))
8136
    return 0;
8137
 
8138
  /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8139
  if (mode == SFmode
8140
      && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8141
          || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8142
          || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8143
          || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8144
          || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8145
          || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8146
    return 0;
8147
 
8148
  /* Passed.  Operands are suitable for fmpyadd.  */
8149
  return 1;
8150
}
8151
 
8152
#if !defined(USE_COLLECT2)
8153
static void
8154
pa_asm_out_constructor (rtx symbol, int priority)
8155
{
8156
  if (!function_label_operand (symbol, VOIDmode))
8157
    hppa_encode_label (symbol);
8158
 
8159
#ifdef CTORS_SECTION_ASM_OP
8160
  default_ctor_section_asm_out_constructor (symbol, priority);
8161
#else
8162
# ifdef TARGET_ASM_NAMED_SECTION
8163
  default_named_section_asm_out_constructor (symbol, priority);
8164
# else
8165
  default_stabs_asm_out_constructor (symbol, priority);
8166
# endif
8167
#endif
8168
}
8169
 
8170
static void
8171
pa_asm_out_destructor (rtx symbol, int priority)
8172
{
8173
  if (!function_label_operand (symbol, VOIDmode))
8174
    hppa_encode_label (symbol);
8175
 
8176
#ifdef DTORS_SECTION_ASM_OP
8177
  default_dtor_section_asm_out_destructor (symbol, priority);
8178
#else
8179
# ifdef TARGET_ASM_NAMED_SECTION
8180
  default_named_section_asm_out_destructor (symbol, priority);
8181
# else
8182
  default_stabs_asm_out_destructor (symbol, priority);
8183
# endif
8184
#endif
8185
}
8186
#endif
8187
 
8188
/* This function places uninitialized global data in the bss section.
8189
   The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8190
   function on the SOM port to prevent uninitialized global data from
8191
   being placed in the data section.  */
8192
 
8193
void
8194
pa_asm_output_aligned_bss (FILE *stream,
8195
                           const char *name,
8196
                           unsigned HOST_WIDE_INT size,
8197
                           unsigned int align)
8198
{
8199
  switch_to_section (bss_section);
8200
  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8201
 
8202
#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8203
  ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8204
#endif
8205
 
8206
#ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8207
  ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8208
#endif
8209
 
8210
  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8211
  ASM_OUTPUT_LABEL (stream, name);
8212
  fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8213
}
8214
 
8215
/* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8216
   that doesn't allow the alignment of global common storage to be directly
8217
   specified.  The SOM linker aligns common storage based on the rounded
8218
   value of the NUM_BYTES parameter in the .comm directive.  It's not
8219
   possible to use the .align directive as it doesn't affect the alignment
8220
   of the label associated with a .comm directive.  */
8221
 
8222
void
8223
pa_asm_output_aligned_common (FILE *stream,
8224
                              const char *name,
8225
                              unsigned HOST_WIDE_INT size,
8226
                              unsigned int align)
8227
{
8228
  unsigned int max_common_align;
8229
 
8230
  max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8231
  if (align > max_common_align)
8232
    {
8233
      warning (0, "alignment (%u) for %s exceeds maximum alignment "
8234
               "for global common data.  Using %u",
8235
               align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8236
      align = max_common_align;
8237
    }
8238
 
8239
  switch_to_section (bss_section);
8240
 
8241
  assemble_name (stream, name);
8242
  fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8243
           MAX (size, align / BITS_PER_UNIT));
8244
}
8245
 
8246
/* We can't use .comm for local common storage as the SOM linker effectively
8247
   treats the symbol as universal and uses the same storage for local symbols
8248
   with the same name in different object files.  The .block directive
8249
   reserves an uninitialized block of storage.  However, it's not common
8250
   storage.  Fortunately, GCC never requests common storage with the same
8251
   name in any given translation unit.  */
8252
 
8253
void
8254
pa_asm_output_aligned_local (FILE *stream,
8255
                             const char *name,
8256
                             unsigned HOST_WIDE_INT size,
8257
                             unsigned int align)
8258
{
8259
  switch_to_section (bss_section);
8260
  fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8261
 
8262
#ifdef LOCAL_ASM_OP
8263
  fprintf (stream, "%s", LOCAL_ASM_OP);
8264
  assemble_name (stream, name);
8265
  fprintf (stream, "\n");
8266
#endif
8267
 
8268
  ASM_OUTPUT_LABEL (stream, name);
8269
  fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8270
}
8271
 
8272
/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8273
   use in fmpysub instructions.  */
8274
int
8275
fmpysuboperands (rtx *operands)
8276
{
8277
  enum machine_mode mode = GET_MODE (operands[0]);
8278
 
8279
  /* Must be a floating point mode.  */
8280
  if (mode != SFmode && mode != DFmode)
8281
    return 0;
8282
 
8283
  /* All modes must be the same.  */
8284
  if (! (mode == GET_MODE (operands[1])
8285
         && mode == GET_MODE (operands[2])
8286
         && mode == GET_MODE (operands[3])
8287
         && mode == GET_MODE (operands[4])
8288
         && mode == GET_MODE (operands[5])))
8289
    return 0;
8290
 
8291
  /* All operands must be registers.  */
8292
  if (! (GET_CODE (operands[1]) == REG
8293
         && GET_CODE (operands[2]) == REG
8294
         && GET_CODE (operands[3]) == REG
8295
         && GET_CODE (operands[4]) == REG
8296
         && GET_CODE (operands[5]) == REG))
8297
    return 0;
8298
 
8299
  /* Only 2 real operands to the subtraction.  Subtraction is not a commutative
8300
     operation, so operands[4] must be the same as operand[3].  */
8301
  if (! rtx_equal_p (operands[3], operands[4]))
8302
    return 0;
8303
 
8304
  /* multiply cannot feed into subtraction.  */
8305
  if (rtx_equal_p (operands[5], operands[0]))
8306
    return 0;
8307
 
8308
  /* Inout operand of sub cannot conflict with any operands from multiply.  */
8309
  if (rtx_equal_p (operands[3], operands[0])
8310
     || rtx_equal_p (operands[3], operands[1])
8311
     || rtx_equal_p (operands[3], operands[2]))
8312
    return 0;
8313
 
8314
  /* SFmode limits the registers to the upper 32 of the 32bit FP regs.  */
8315
  if (mode == SFmode
8316
      && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8317
          || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8318
          || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8319
          || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8320
          || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8321
          || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8322
    return 0;
8323
 
8324
  /* Passed.  Operands are suitable for fmpysub.  */
8325
  return 1;
8326
}
8327
 
8328
/* Return 1 if the given constant is 2, 4, or 8.  These are the valid
8329
   constants for shadd instructions.  */
8330
int
8331
shadd_constant_p (int val)
8332
{
8333
  if (val == 2 || val == 4 || val == 8)
8334
    return 1;
8335
  else
8336
    return 0;
8337
}
8338
 
8339
/* Return 1 if OP is valid as a base or index register in a
8340
   REG+REG address.  */
8341
 
8342
int
8343
borx_reg_operand (rtx op, enum machine_mode mode)
8344
{
8345
  if (GET_CODE (op) != REG)
8346
    return 0;
8347
 
8348
  /* We must reject virtual registers as the only expressions that
8349
     can be instantiated are REG and REG+CONST.  */
8350
  if (op == virtual_incoming_args_rtx
8351
      || op == virtual_stack_vars_rtx
8352
      || op == virtual_stack_dynamic_rtx
8353
      || op == virtual_outgoing_args_rtx
8354
      || op == virtual_cfa_rtx)
8355
    return 0;
8356
 
8357
  /* While it's always safe to index off the frame pointer, it's not
8358
     profitable to do so when the frame pointer is being eliminated.  */
8359
  if (!reload_completed
8360
      && flag_omit_frame_pointer
8361
      && !current_function_calls_alloca
8362
      && op == frame_pointer_rtx)
8363
    return 0;
8364
 
8365
  return register_operand (op, mode);
8366
}
8367
 
8368
/* Return 1 if this operand is anything other than a hard register.  */
8369
 
8370
int
8371
non_hard_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8372
{
8373
  return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER);
8374
}
8375
 
8376
/* Return 1 if INSN branches forward.  Should be using insn_addresses
8377
   to avoid walking through all the insns...  */
8378
static int
8379
forward_branch_p (rtx insn)
8380
{
8381
  rtx label = JUMP_LABEL (insn);
8382
 
8383
  while (insn)
8384
    {
8385
      if (insn == label)
8386
        break;
8387
      else
8388
        insn = NEXT_INSN (insn);
8389
    }
8390
 
8391
  return (insn == label);
8392
}
8393
 
8394
/* Return 1 if OP is an equality comparison, else return 0.  */
8395
int
8396
eq_neq_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8397
{
8398
  return (GET_CODE (op) == EQ || GET_CODE (op) == NE);
8399
}
8400
 
8401
/* Return 1 if INSN is in the delay slot of a call instruction.  */
8402
int
8403
jump_in_call_delay (rtx insn)
8404
{
8405
 
8406
  if (GET_CODE (insn) != JUMP_INSN)
8407
    return 0;
8408
 
8409
  if (PREV_INSN (insn)
8410
      && PREV_INSN (PREV_INSN (insn))
8411
      && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
8412
    {
8413
      rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
8414
 
8415
      return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8416
              && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8417
 
8418
    }
8419
  else
8420
    return 0;
8421
}
8422
 
8423
/* Output an unconditional move and branch insn.  */
8424
 
8425
const char *
8426
output_parallel_movb (rtx *operands, rtx insn)
8427
{
8428
  int length = get_attr_length (insn);
8429
 
8430
  /* These are the cases in which we win.  */
8431
  if (length == 4)
8432
    return "mov%I1b,tr %1,%0,%2";
8433
 
8434
  /* None of the following cases win, but they don't lose either.  */
8435
  if (length == 8)
8436
    {
8437
      if (dbr_sequence_length () == 0)
8438
        {
8439
          /* Nothing in the delay slot, fake it by putting the combined
8440
             insn (the copy or add) in the delay slot of a bl.  */
8441
          if (GET_CODE (operands[1]) == CONST_INT)
8442
            return "b %2\n\tldi %1,%0";
8443
          else
8444
            return "b %2\n\tcopy %1,%0";
8445
        }
8446
      else
8447
        {
8448
          /* Something in the delay slot, but we've got a long branch.  */
8449
          if (GET_CODE (operands[1]) == CONST_INT)
8450
            return "ldi %1,%0\n\tb %2";
8451
          else
8452
            return "copy %1,%0\n\tb %2";
8453
        }
8454
    }
8455
 
8456
  if (GET_CODE (operands[1]) == CONST_INT)
8457
    output_asm_insn ("ldi %1,%0", operands);
8458
  else
8459
    output_asm_insn ("copy %1,%0", operands);
8460
  return output_lbranch (operands[2], insn, 1);
8461
}
8462
 
8463
/* Output an unconditional add and branch insn.  */
8464
 
8465
const char *
8466
output_parallel_addb (rtx *operands, rtx insn)
8467
{
8468
  int length = get_attr_length (insn);
8469
 
8470
  /* To make life easy we want operand0 to be the shared input/output
8471
     operand and operand1 to be the readonly operand.  */
8472
  if (operands[0] == operands[1])
8473
    operands[1] = operands[2];
8474
 
8475
  /* These are the cases in which we win.  */
8476
  if (length == 4)
8477
    return "add%I1b,tr %1,%0,%3";
8478
 
8479
  /* None of the following cases win, but they don't lose either.  */
8480
  if (length == 8)
8481
    {
8482
      if (dbr_sequence_length () == 0)
8483
        /* Nothing in the delay slot, fake it by putting the combined
8484
           insn (the copy or add) in the delay slot of a bl.  */
8485
        return "b %3\n\tadd%I1 %1,%0,%0";
8486
      else
8487
        /* Something in the delay slot, but we've got a long branch.  */
8488
        return "add%I1 %1,%0,%0\n\tb %3";
8489
    }
8490
 
8491
  output_asm_insn ("add%I1 %1,%0,%0", operands);
8492
  return output_lbranch (operands[3], insn, 1);
8493
}
8494
 
8495
/* Return nonzero if INSN (a jump insn) immediately follows a call
8496
   to a named function.  This is used to avoid filling the delay slot
8497
   of the jump since it can usually be eliminated by modifying RP in
8498
   the delay slot of the call.  */
8499
 
8500
int
8501
following_call (rtx insn)
8502
{
8503
  if (! TARGET_JUMP_IN_DELAY)
8504
    return 0;
8505
 
8506
  /* Find the previous real insn, skipping NOTEs.  */
8507
  insn = PREV_INSN (insn);
8508
  while (insn && GET_CODE (insn) == NOTE)
8509
    insn = PREV_INSN (insn);
8510
 
8511
  /* Check for CALL_INSNs and millicode calls.  */
8512
  if (insn
8513
      && ((GET_CODE (insn) == CALL_INSN
8514
           && get_attr_type (insn) != TYPE_DYNCALL)
8515
          || (GET_CODE (insn) == INSN
8516
              && GET_CODE (PATTERN (insn)) != SEQUENCE
8517
              && GET_CODE (PATTERN (insn)) != USE
8518
              && GET_CODE (PATTERN (insn)) != CLOBBER
8519
              && get_attr_type (insn) == TYPE_MILLI)))
8520
    return 1;
8521
 
8522
  return 0;
8523
}
8524
 
8525
/* We use this hook to perform a PA specific optimization which is difficult
8526
   to do in earlier passes.
8527
 
8528
   We want the delay slots of branches within jump tables to be filled.
8529
   None of the compiler passes at the moment even has the notion that a
8530
   PA jump table doesn't contain addresses, but instead contains actual
8531
   instructions!
8532
 
8533
   Because we actually jump into the table, the addresses of each entry
8534
   must stay constant in relation to the beginning of the table (which
8535
   itself must stay constant relative to the instruction to jump into
8536
   it).  I don't believe we can guarantee earlier passes of the compiler
8537
   will adhere to those rules.
8538
 
8539
   So, late in the compilation process we find all the jump tables, and
8540
   expand them into real code -- e.g. each entry in the jump table vector
8541
   will get an appropriate label followed by a jump to the final target.
8542
 
8543
   Reorg and the final jump pass can then optimize these branches and
8544
   fill their delay slots.  We end up with smaller, more efficient code.
8545
 
8546
   The jump instructions within the table are special; we must be able
8547
   to identify them during assembly output (if the jumps don't get filled
8548
   we need to emit a nop rather than nullifying the delay slot)).  We
8549
   identify jumps in switch tables by using insns with the attribute
8550
   type TYPE_BTABLE_BRANCH.
8551
 
8552
   We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8553
   insns.  This serves two purposes, first it prevents jump.c from
8554
   noticing that the last N entries in the table jump to the instruction
8555
   immediately after the table and deleting the jumps.  Second, those
8556
   insns mark where we should emit .begin_brtab and .end_brtab directives
8557
   when using GAS (allows for better link time optimizations).  */
8558
 
8559
static void
8560
pa_reorg (void)
8561
{
8562
  rtx insn;
8563
 
8564
  remove_useless_addtr_insns (1);
8565
 
8566
  if (pa_cpu < PROCESSOR_8000)
8567
    pa_combine_instructions ();
8568
 
8569
 
8570
  /* This is fairly cheap, so always run it if optimizing.  */
8571
  if (optimize > 0 && !TARGET_BIG_SWITCH)
8572
    {
8573
      /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns.  */
8574
      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8575
        {
8576
          rtx pattern, tmp, location, label;
8577
          unsigned int length, i;
8578
 
8579
          /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode.  */
8580
          if (GET_CODE (insn) != JUMP_INSN
8581
              || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8582
                  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8583
            continue;
8584
 
8585
          /* Emit marker for the beginning of the branch table.  */
8586
          emit_insn_before (gen_begin_brtab (), insn);
8587
 
8588
          pattern = PATTERN (insn);
8589
          location = PREV_INSN (insn);
8590
          length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
8591
 
8592
          for (i = 0; i < length; i++)
8593
            {
8594
              /* Emit a label before each jump to keep jump.c from
8595
                 removing this code.  */
8596
              tmp = gen_label_rtx ();
8597
              LABEL_NUSES (tmp) = 1;
8598
              emit_label_after (tmp, location);
8599
              location = NEXT_INSN (location);
8600
 
8601
              if (GET_CODE (pattern) == ADDR_VEC)
8602
                label = XEXP (XVECEXP (pattern, 0, i), 0);
8603
              else
8604
                label = XEXP (XVECEXP (pattern, 1, i), 0);
8605
 
8606
              tmp = gen_short_jump (label);
8607
 
8608
              /* Emit the jump itself.  */
8609
              tmp = emit_jump_insn_after (tmp, location);
8610
              JUMP_LABEL (tmp) = label;
8611
              LABEL_NUSES (label)++;
8612
              location = NEXT_INSN (location);
8613
 
8614
              /* Emit a BARRIER after the jump.  */
8615
              emit_barrier_after (location);
8616
              location = NEXT_INSN (location);
8617
            }
8618
 
8619
          /* Emit marker for the end of the branch table.  */
8620
          emit_insn_before (gen_end_brtab (), location);
8621
          location = NEXT_INSN (location);
8622
          emit_barrier_after (location);
8623
 
8624
          /* Delete the ADDR_VEC or ADDR_DIFF_VEC.  */
8625
          delete_insn (insn);
8626
        }
8627
    }
8628
  else
8629
    {
8630
      /* Still need brtab marker insns.  FIXME: the presence of these
8631
         markers disables output of the branch table to readonly memory,
8632
         and any alignment directives that might be needed.  Possibly,
8633
         the begin_brtab insn should be output before the label for the
8634
         table.  This doesn't matter at the moment since the tables are
8635
         always output in the text section.  */
8636
      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8637
        {
8638
          /* Find an ADDR_VEC insn.  */
8639
          if (GET_CODE (insn) != JUMP_INSN
8640
              || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8641
                  && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
8642
            continue;
8643
 
8644
          /* Now generate markers for the beginning and end of the
8645
             branch table.  */
8646
          emit_insn_before (gen_begin_brtab (), insn);
8647
          emit_insn_after (gen_end_brtab (), insn);
8648
        }
8649
    }
8650
}
8651
 
8652
/* The PA has a number of odd instructions which can perform multiple
8653
   tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
8654
   it may be profitable to combine two instructions into one instruction
8655
   with two outputs.  It's not profitable PA2.0 machines because the
8656
   two outputs would take two slots in the reorder buffers.
8657
 
8658
   This routine finds instructions which can be combined and combines
8659
   them.  We only support some of the potential combinations, and we
8660
   only try common ways to find suitable instructions.
8661
 
8662
      * addb can add two registers or a register and a small integer
8663
      and jump to a nearby (+-8k) location.  Normally the jump to the
8664
      nearby location is conditional on the result of the add, but by
8665
      using the "true" condition we can make the jump unconditional.
8666
      Thus addb can perform two independent operations in one insn.
8667
 
8668
      * movb is similar to addb in that it can perform a reg->reg
8669
      or small immediate->reg copy and jump to a nearby (+-8k location).
8670
 
8671
      * fmpyadd and fmpysub can perform a FP multiply and either an
8672
      FP add or FP sub if the operands of the multiply and add/sub are
8673
      independent (there are other minor restrictions).  Note both
8674
      the fmpy and fadd/fsub can in theory move to better spots according
8675
      to data dependencies, but for now we require the fmpy stay at a
8676
      fixed location.
8677
 
8678
      * Many of the memory operations can perform pre & post updates
8679
      of index registers.  GCC's pre/post increment/decrement addressing
8680
      is far too simple to take advantage of all the possibilities.  This
8681
      pass may not be suitable since those insns may not be independent.
8682
 
8683
      * comclr can compare two ints or an int and a register, nullify
8684
      the following instruction and zero some other register.  This
8685
      is more difficult to use as it's harder to find an insn which
8686
      will generate a comclr than finding something like an unconditional
8687
      branch.  (conditional moves & long branches create comclr insns).
8688
 
8689
      * Most arithmetic operations can conditionally skip the next
8690
      instruction.  They can be viewed as "perform this operation
8691
      and conditionally jump to this nearby location" (where nearby
8692
      is an insns away).  These are difficult to use due to the
8693
      branch length restrictions.  */
8694
 
8695
static void
8696
pa_combine_instructions (void)
8697
{
8698
  rtx anchor, new;
8699
 
8700
  /* This can get expensive since the basic algorithm is on the
8701
     order of O(n^2) (or worse).  Only do it for -O2 or higher
8702
     levels of optimization.  */
8703
  if (optimize < 2)
8704
    return;
8705
 
8706
  /* Walk down the list of insns looking for "anchor" insns which
8707
     may be combined with "floating" insns.  As the name implies,
8708
     "anchor" instructions don't move, while "floating" insns may
8709
     move around.  */
8710
  new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
8711
  new = make_insn_raw (new);
8712
 
8713
  for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
8714
    {
8715
      enum attr_pa_combine_type anchor_attr;
8716
      enum attr_pa_combine_type floater_attr;
8717
 
8718
      /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8719
         Also ignore any special USE insns.  */
8720
      if ((GET_CODE (anchor) != INSN
8721
          && GET_CODE (anchor) != JUMP_INSN
8722
          && GET_CODE (anchor) != CALL_INSN)
8723
          || GET_CODE (PATTERN (anchor)) == USE
8724
          || GET_CODE (PATTERN (anchor)) == CLOBBER
8725
          || GET_CODE (PATTERN (anchor)) == ADDR_VEC
8726
          || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
8727
        continue;
8728
 
8729
      anchor_attr = get_attr_pa_combine_type (anchor);
8730
      /* See if anchor is an insn suitable for combination.  */
8731
      if (anchor_attr == PA_COMBINE_TYPE_FMPY
8732
          || anchor_attr == PA_COMBINE_TYPE_FADDSUB
8733
          || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8734
              && ! forward_branch_p (anchor)))
8735
        {
8736
          rtx floater;
8737
 
8738
          for (floater = PREV_INSN (anchor);
8739
               floater;
8740
               floater = PREV_INSN (floater))
8741
            {
8742
              if (GET_CODE (floater) == NOTE
8743
                  || (GET_CODE (floater) == INSN
8744
                      && (GET_CODE (PATTERN (floater)) == USE
8745
                          || GET_CODE (PATTERN (floater)) == CLOBBER)))
8746
                continue;
8747
 
8748
              /* Anything except a regular INSN will stop our search.  */
8749
              if (GET_CODE (floater) != INSN
8750
                  || GET_CODE (PATTERN (floater)) == ADDR_VEC
8751
                  || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8752
                {
8753
                  floater = NULL_RTX;
8754
                  break;
8755
                }
8756
 
8757
              /* See if FLOATER is suitable for combination with the
8758
                 anchor.  */
8759
              floater_attr = get_attr_pa_combine_type (floater);
8760
              if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8761
                   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8762
                  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8763
                      && floater_attr == PA_COMBINE_TYPE_FMPY))
8764
                {
8765
                  /* If ANCHOR and FLOATER can be combined, then we're
8766
                     done with this pass.  */
8767
                  if (pa_can_combine_p (new, anchor, floater, 0,
8768
                                        SET_DEST (PATTERN (floater)),
8769
                                        XEXP (SET_SRC (PATTERN (floater)), 0),
8770
                                        XEXP (SET_SRC (PATTERN (floater)), 1)))
8771
                    break;
8772
                }
8773
 
8774
              else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
8775
                       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
8776
                {
8777
                  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
8778
                    {
8779
                      if (pa_can_combine_p (new, anchor, floater, 0,
8780
                                            SET_DEST (PATTERN (floater)),
8781
                                        XEXP (SET_SRC (PATTERN (floater)), 0),
8782
                                        XEXP (SET_SRC (PATTERN (floater)), 1)))
8783
                        break;
8784
                    }
8785
                  else
8786
                    {
8787
                      if (pa_can_combine_p (new, anchor, floater, 0,
8788
                                            SET_DEST (PATTERN (floater)),
8789
                                            SET_SRC (PATTERN (floater)),
8790
                                            SET_SRC (PATTERN (floater))))
8791
                        break;
8792
                    }
8793
                }
8794
            }
8795
 
8796
          /* If we didn't find anything on the backwards scan try forwards.  */
8797
          if (!floater
8798
              && (anchor_attr == PA_COMBINE_TYPE_FMPY
8799
                  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
8800
            {
8801
              for (floater = anchor; floater; floater = NEXT_INSN (floater))
8802
                {
8803
                  if (GET_CODE (floater) == NOTE
8804
                      || (GET_CODE (floater) == INSN
8805
                          && (GET_CODE (PATTERN (floater)) == USE
8806
                              || GET_CODE (PATTERN (floater)) == CLOBBER)))
8807
 
8808
                    continue;
8809
 
8810
                  /* Anything except a regular INSN will stop our search.  */
8811
                  if (GET_CODE (floater) != INSN
8812
                      || GET_CODE (PATTERN (floater)) == ADDR_VEC
8813
                      || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
8814
                    {
8815
                      floater = NULL_RTX;
8816
                      break;
8817
                    }
8818
 
8819
                  /* See if FLOATER is suitable for combination with the
8820
                     anchor.  */
8821
                  floater_attr = get_attr_pa_combine_type (floater);
8822
                  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
8823
                       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
8824
                      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8825
                          && floater_attr == PA_COMBINE_TYPE_FMPY))
8826
                    {
8827
                      /* If ANCHOR and FLOATER can be combined, then we're
8828
                         done with this pass.  */
8829
                      if (pa_can_combine_p (new, anchor, floater, 1,
8830
                                            SET_DEST (PATTERN (floater)),
8831
                                            XEXP (SET_SRC (PATTERN (floater)),
8832
                                                  0),
8833
                                            XEXP (SET_SRC (PATTERN (floater)),
8834
                                                  1)))
8835
                        break;
8836
                    }
8837
                }
8838
            }
8839
 
8840
          /* FLOATER will be nonzero if we found a suitable floating
8841
             insn for combination with ANCHOR.  */
8842
          if (floater
8843
              && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
8844
                  || anchor_attr == PA_COMBINE_TYPE_FMPY))
8845
            {
8846
              /* Emit the new instruction and delete the old anchor.  */
8847
              emit_insn_before (gen_rtx_PARALLEL
8848
                                (VOIDmode,
8849
                                 gen_rtvec (2, PATTERN (anchor),
8850
                                            PATTERN (floater))),
8851
                                anchor);
8852
 
8853
              PUT_CODE (anchor, NOTE);
8854
              NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8855
              NOTE_SOURCE_FILE (anchor) = 0;
8856
 
8857
              /* Emit a special USE insn for FLOATER, then delete
8858
                 the floating insn.  */
8859
              emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8860
              delete_insn (floater);
8861
 
8862
              continue;
8863
            }
8864
          else if (floater
8865
                   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
8866
            {
8867
              rtx temp;
8868
              /* Emit the new_jump instruction and delete the old anchor.  */
8869
              temp
8870
                = emit_jump_insn_before (gen_rtx_PARALLEL
8871
                                         (VOIDmode,
8872
                                          gen_rtvec (2, PATTERN (anchor),
8873
                                                     PATTERN (floater))),
8874
                                         anchor);
8875
 
8876
              JUMP_LABEL (temp) = JUMP_LABEL (anchor);
8877
              PUT_CODE (anchor, NOTE);
8878
              NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
8879
              NOTE_SOURCE_FILE (anchor) = 0;
8880
 
8881
              /* Emit a special USE insn for FLOATER, then delete
8882
                 the floating insn.  */
8883
              emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
8884
              delete_insn (floater);
8885
              continue;
8886
            }
8887
        }
8888
    }
8889
}
8890
 
8891
static int
8892
pa_can_combine_p (rtx new, rtx anchor, rtx floater, int reversed, rtx dest,
8893
                  rtx src1, rtx src2)
8894
{
8895
  int insn_code_number;
8896
  rtx start, end;
8897
 
8898
  /* Create a PARALLEL with the patterns of ANCHOR and
8899
     FLOATER, try to recognize it, then test constraints
8900
     for the resulting pattern.
8901
 
8902
     If the pattern doesn't match or the constraints
8903
     aren't met keep searching for a suitable floater
8904
     insn.  */
8905
  XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
8906
  XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
8907
  INSN_CODE (new) = -1;
8908
  insn_code_number = recog_memoized (new);
8909
  if (insn_code_number < 0
8910
      || (extract_insn (new), ! constrain_operands (1)))
8911
    return 0;
8912
 
8913
  if (reversed)
8914
    {
8915
      start = anchor;
8916
      end = floater;
8917
    }
8918
  else
8919
    {
8920
      start = floater;
8921
      end = anchor;
8922
    }
8923
 
8924
  /* There's up to three operands to consider.  One
8925
     output and two inputs.
8926
 
8927
     The output must not be used between FLOATER & ANCHOR
8928
     exclusive.  The inputs must not be set between
8929
     FLOATER and ANCHOR exclusive.  */
8930
 
8931
  if (reg_used_between_p (dest, start, end))
8932
    return 0;
8933
 
8934
  if (reg_set_between_p (src1, start, end))
8935
    return 0;
8936
 
8937
  if (reg_set_between_p (src2, start, end))
8938
    return 0;
8939
 
8940
  /* If we get here, then everything is good.  */
8941
  return 1;
8942
}
8943
 
8944
/* Return nonzero if references for INSN are delayed.
8945
 
8946
   Millicode insns are actually function calls with some special
8947
   constraints on arguments and register usage.
8948
 
8949
   Millicode calls always expect their arguments in the integer argument
8950
   registers, and always return their result in %r29 (ret1).  They
8951
   are expected to clobber their arguments, %r1, %r29, and the return
8952
   pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
8953
 
8954
   This function tells reorg that the references to arguments and
8955
   millicode calls do not appear to happen until after the millicode call.
8956
   This allows reorg to put insns which set the argument registers into the
8957
   delay slot of the millicode call -- thus they act more like traditional
8958
   CALL_INSNs.
8959
 
8960
   Note we cannot consider side effects of the insn to be delayed because
8961
   the branch and link insn will clobber the return pointer.  If we happened
8962
   to use the return pointer in the delay slot of the call, then we lose.
8963
 
8964
   get_attr_type will try to recognize the given insn, so make sure to
8965
   filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
8966
   in particular.  */
8967
int
8968
insn_refs_are_delayed (rtx insn)
8969
{
8970
  return ((GET_CODE (insn) == INSN
8971
           && GET_CODE (PATTERN (insn)) != SEQUENCE
8972
           && GET_CODE (PATTERN (insn)) != USE
8973
           && GET_CODE (PATTERN (insn)) != CLOBBER
8974
           && get_attr_type (insn) == TYPE_MILLI));
8975
}
8976
 
8977
/* On the HP-PA the value is found in register(s) 28(-29), unless
8978
   the mode is SF or DF. Then the value is returned in fr4 (32).
8979
 
8980
   This must perform the same promotions as PROMOTE_MODE, else
8981
   TARGET_PROMOTE_FUNCTION_RETURN will not work correctly.
8982
 
8983
   Small structures must be returned in a PARALLEL on PA64 in order
8984
   to match the HP Compiler ABI.  */
8985
 
8986
rtx
8987
function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
8988
{
8989
  enum machine_mode valmode;
8990
 
8991
  if (AGGREGATE_TYPE_P (valtype)
8992
      || TREE_CODE (valtype) == COMPLEX_TYPE
8993
      || TREE_CODE (valtype) == VECTOR_TYPE)
8994
    {
8995
      if (TARGET_64BIT)
8996
        {
8997
          /* Aggregates with a size less than or equal to 128 bits are
8998
             returned in GR 28(-29).  They are left justified.  The pad
8999
             bits are undefined.  Larger aggregates are returned in
9000
             memory.  */
9001
          rtx loc[2];
9002
          int i, offset = 0;
9003
          int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9004
 
9005
          for (i = 0; i < ub; i++)
9006
            {
9007
              loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9008
                                          gen_rtx_REG (DImode, 28 + i),
9009
                                          GEN_INT (offset));
9010
              offset += 8;
9011
            }
9012
 
9013
          return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9014
        }
9015
      else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
9016
        {
9017
          /* Aggregates 5 to 8 bytes in size are returned in general
9018
             registers r28-r29 in the same manner as other non
9019
             floating-point objects.  The data is right-justified and
9020
             zero-extended to 64 bits.  This is opposite to the normal
9021
             justification used on big endian targets and requires
9022
             special treatment.  */
9023
          rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9024
                                       gen_rtx_REG (DImode, 28), const0_rtx);
9025
          return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9026
        }
9027
    }
9028
 
9029
  if ((INTEGRAL_TYPE_P (valtype)
9030
       && TYPE_PRECISION (valtype) < BITS_PER_WORD)
9031
      || POINTER_TYPE_P (valtype))
9032
    valmode = word_mode;
9033
  else
9034
    valmode = TYPE_MODE (valtype);
9035
 
9036
  if (TREE_CODE (valtype) == REAL_TYPE
9037
      && !AGGREGATE_TYPE_P (valtype)
9038
      && TYPE_MODE (valtype) != TFmode
9039
      && !TARGET_SOFT_FLOAT)
9040
    return gen_rtx_REG (valmode, 32);
9041
 
9042
  return gen_rtx_REG (valmode, 28);
9043
}
9044
 
9045
/* Return the location of a parameter that is passed in a register or NULL
9046
   if the parameter has any component that is passed in memory.
9047
 
9048
   This is new code and will be pushed to into the net sources after
9049
   further testing.
9050
 
9051
   ??? We might want to restructure this so that it looks more like other
9052
   ports.  */
9053
rtx
9054
function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
9055
              int named ATTRIBUTE_UNUSED)
9056
{
9057
  int max_arg_words = (TARGET_64BIT ? 8 : 4);
9058
  int alignment = 0;
9059
  int arg_size;
9060
  int fpr_reg_base;
9061
  int gpr_reg_base;
9062
  rtx retval;
9063
 
9064
  if (mode == VOIDmode)
9065
    return NULL_RTX;
9066
 
9067
  arg_size = FUNCTION_ARG_SIZE (mode, type);
9068
 
9069
  /* If this arg would be passed partially or totally on the stack, then
9070
     this routine should return zero.  pa_arg_partial_bytes will
9071
     handle arguments which are split between regs and stack slots if
9072
     the ABI mandates split arguments.  */
9073
  if (!TARGET_64BIT)
9074
    {
9075
      /* The 32-bit ABI does not split arguments.  */
9076
      if (cum->words + arg_size > max_arg_words)
9077
        return NULL_RTX;
9078
    }
9079
  else
9080
    {
9081
      if (arg_size > 1)
9082
        alignment = cum->words & 1;
9083
      if (cum->words + alignment >= max_arg_words)
9084
        return NULL_RTX;
9085
    }
9086
 
9087
  /* The 32bit ABIs and the 64bit ABIs are rather different,
9088
     particularly in their handling of FP registers.  We might
9089
     be able to cleverly share code between them, but I'm not
9090
     going to bother in the hope that splitting them up results
9091
     in code that is more easily understood.  */
9092
 
9093
  if (TARGET_64BIT)
9094
    {
9095
      /* Advance the base registers to their current locations.
9096
 
9097
         Remember, gprs grow towards smaller register numbers while
9098
         fprs grow to higher register numbers.  Also remember that
9099
         although FP regs are 32-bit addressable, we pretend that
9100
         the registers are 64-bits wide.  */
9101
      gpr_reg_base = 26 - cum->words;
9102
      fpr_reg_base = 32 + cum->words;
9103
 
9104
      /* Arguments wider than one word and small aggregates need special
9105
         treatment.  */
9106
      if (arg_size > 1
9107
          || mode == BLKmode
9108
          || (type && (AGGREGATE_TYPE_P (type)
9109
                       || TREE_CODE (type) == COMPLEX_TYPE
9110
                       || TREE_CODE (type) == VECTOR_TYPE)))
9111
        {
9112
          /* Double-extended precision (80-bit), quad-precision (128-bit)
9113
             and aggregates including complex numbers are aligned on
9114
             128-bit boundaries.  The first eight 64-bit argument slots
9115
             are associated one-to-one, with general registers r26
9116
             through r19, and also with floating-point registers fr4
9117
             through fr11.  Arguments larger than one word are always
9118
             passed in general registers.
9119
 
9120
             Using a PARALLEL with a word mode register results in left
9121
             justified data on a big-endian target.  */
9122
 
9123
          rtx loc[8];
9124
          int i, offset = 0, ub = arg_size;
9125
 
9126
          /* Align the base register.  */
9127
          gpr_reg_base -= alignment;
9128
 
9129
          ub = MIN (ub, max_arg_words - cum->words - alignment);
9130
          for (i = 0; i < ub; i++)
9131
            {
9132
              loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9133
                                          gen_rtx_REG (DImode, gpr_reg_base),
9134
                                          GEN_INT (offset));
9135
              gpr_reg_base -= 1;
9136
              offset += 8;
9137
            }
9138
 
9139
          return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9140
        }
9141
     }
9142
  else
9143
    {
9144
      /* If the argument is larger than a word, then we know precisely
9145
         which registers we must use.  */
9146
      if (arg_size > 1)
9147
        {
9148
          if (cum->words)
9149
            {
9150
              gpr_reg_base = 23;
9151
              fpr_reg_base = 38;
9152
            }
9153
          else
9154
            {
9155
              gpr_reg_base = 25;
9156
              fpr_reg_base = 34;
9157
            }
9158
 
9159
          /* Structures 5 to 8 bytes in size are passed in the general
9160
             registers in the same manner as other non floating-point
9161
             objects.  The data is right-justified and zero-extended
9162
             to 64 bits.  This is opposite to the normal justification
9163
             used on big endian targets and requires special treatment.
9164
             We now define BLOCK_REG_PADDING to pad these objects.
9165
             Aggregates, complex and vector types are passed in the same
9166
             manner as structures.  */
9167
          if (mode == BLKmode
9168
              || (type && (AGGREGATE_TYPE_P (type)
9169
                           || TREE_CODE (type) == COMPLEX_TYPE
9170
                           || TREE_CODE (type) == VECTOR_TYPE)))
9171
            {
9172
              rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9173
                                           gen_rtx_REG (DImode, gpr_reg_base),
9174
                                           const0_rtx);
9175
              return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9176
            }
9177
        }
9178
      else
9179
        {
9180
           /* We have a single word (32 bits).  A simple computation
9181
              will get us the register #s we need.  */
9182
           gpr_reg_base = 26 - cum->words;
9183
           fpr_reg_base = 32 + 2 * cum->words;
9184
        }
9185
    }
9186
 
9187
  /* Determine if the argument needs to be passed in both general and
9188
     floating point registers.  */
9189
  if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9190
       /* If we are doing soft-float with portable runtime, then there
9191
          is no need to worry about FP regs.  */
9192
       && !TARGET_SOFT_FLOAT
9193
       /* The parameter must be some kind of scalar float, else we just
9194
          pass it in integer registers.  */
9195
       && GET_MODE_CLASS (mode) == MODE_FLOAT
9196
       /* The target function must not have a prototype.  */
9197
       && cum->nargs_prototype <= 0
9198
       /* libcalls do not need to pass items in both FP and general
9199
          registers.  */
9200
       && type != NULL_TREE
9201
       /* All this hair applies to "outgoing" args only.  This includes
9202
          sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
9203
       && !cum->incoming)
9204
      /* Also pass outgoing floating arguments in both registers in indirect
9205
         calls with the 32 bit ABI and the HP assembler since there is no
9206
         way to the specify argument locations in static functions.  */
9207
      || (!TARGET_64BIT
9208
          && !TARGET_GAS
9209
          && !cum->incoming
9210
          && cum->indirect
9211
          && GET_MODE_CLASS (mode) == MODE_FLOAT))
9212
    {
9213
      retval
9214
        = gen_rtx_PARALLEL
9215
            (mode,
9216
             gen_rtvec (2,
9217
                        gen_rtx_EXPR_LIST (VOIDmode,
9218
                                           gen_rtx_REG (mode, fpr_reg_base),
9219
                                           const0_rtx),
9220
                        gen_rtx_EXPR_LIST (VOIDmode,
9221
                                           gen_rtx_REG (mode, gpr_reg_base),
9222
                                           const0_rtx)));
9223
    }
9224
  else
9225
    {
9226
      /* See if we should pass this parameter in a general register.  */
9227
      if (TARGET_SOFT_FLOAT
9228
          /* Indirect calls in the normal 32bit ABI require all arguments
9229
             to be passed in general registers.  */
9230
          || (!TARGET_PORTABLE_RUNTIME
9231
              && !TARGET_64BIT
9232
              && !TARGET_ELF32
9233
              && cum->indirect)
9234
          /* If the parameter is not a scalar floating-point parameter,
9235
             then it belongs in GPRs.  */
9236
          || GET_MODE_CLASS (mode) != MODE_FLOAT
9237
          /* Structure with single SFmode field belongs in GPR.  */
9238
          || (type && AGGREGATE_TYPE_P (type)))
9239
        retval = gen_rtx_REG (mode, gpr_reg_base);
9240
      else
9241
        retval = gen_rtx_REG (mode, fpr_reg_base);
9242
    }
9243
  return retval;
9244
}
9245
 
9246
 
9247
/* If this arg would be passed totally in registers or totally on the stack,
9248
   then this routine should return zero.  */
9249
 
9250
static int
9251
pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9252
                      tree type, bool named ATTRIBUTE_UNUSED)
9253
{
9254
  unsigned int max_arg_words = 8;
9255
  unsigned int offset = 0;
9256
 
9257
  if (!TARGET_64BIT)
9258
    return 0;
9259
 
9260
  if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
9261
    offset = 1;
9262
 
9263
  if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
9264
    /* Arg fits fully into registers.  */
9265
    return 0;
9266
  else if (cum->words + offset >= max_arg_words)
9267
    /* Arg fully on the stack.  */
9268
    return 0;
9269
  else
9270
    /* Arg is split.  */
9271
    return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9272
}
9273
 
9274
 
9275
/* A get_unnamed_section callback for switching to the text section.
9276
 
9277
   This function is only used with SOM.  Because we don't support
9278
   named subspaces, we can only create a new subspace or switch back
9279
   to the default text subspace.  */
9280
 
9281
static void
9282
som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9283
{
9284
  gcc_assert (TARGET_SOM);
9285
  if (TARGET_GAS)
9286
    {
9287
      if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9288
        {
9289
          /* We only want to emit a .nsubspa directive once at the
9290
             start of the function.  */
9291
          cfun->machine->in_nsubspa = 1;
9292
 
9293
          /* Create a new subspace for the text.  This provides
9294
             better stub placement and one-only functions.  */
9295
          if (cfun->decl
9296
              && DECL_ONE_ONLY (cfun->decl)
9297
              && !DECL_WEAK (cfun->decl))
9298
            {
9299
              output_section_asm_op ("\t.SPACE $TEXT$\n"
9300
                                     "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9301
                                     "ACCESS=44,SORT=24,COMDAT");
9302
              return;
9303
            }
9304
        }
9305
      else
9306
        {
9307
          /* There isn't a current function or the body of the current
9308
             function has been completed.  So, we are changing to the
9309
             text section to output debugging information.  Thus, we
9310
             need to forget that we are in the text section so that
9311
             varasm.c will call us when text_section is selected again.  */
9312
          gcc_assert (!cfun || !cfun->machine
9313
                      || cfun->machine->in_nsubspa == 2);
9314
          in_section = NULL;
9315
        }
9316
      output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9317
      return;
9318
    }
9319
  output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9320
}
9321
 
9322
/* A get_unnamed_section callback for switching to comdat data
9323
   sections.  This function is only used with SOM.  */
9324
 
9325
static void
9326
som_output_comdat_data_section_asm_op (const void *data)
9327
{
9328
  in_section = NULL;
9329
  output_section_asm_op (data);
9330
}
9331
 
9332
/* Implement TARGET_ASM_INITIALIZE_SECTIONS  */
9333
 
9334
static void
9335
pa_som_asm_init_sections (void)
9336
{
9337
  text_section
9338
    = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9339
 
9340
  /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9341
     is not being generated.  */
9342
  som_readonly_data_section
9343
    = get_unnamed_section (0, output_section_asm_op,
9344
                           "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9345
 
9346
  /* When secondary definitions are not supported, SOM makes readonly
9347
     data one-only by creating a new $LIT$ subspace in $TEXT$ with
9348
     the comdat flag.  */
9349
  som_one_only_readonly_data_section
9350
    = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9351
                           "\t.SPACE $TEXT$\n"
9352
                           "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9353
                           "ACCESS=0x2c,SORT=16,COMDAT");
9354
 
9355
 
9356
  /* When secondary definitions are not supported, SOM makes data one-only
9357
     by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag.  */
9358
  som_one_only_data_section
9359
    = get_unnamed_section (SECTION_WRITE,
9360
                           som_output_comdat_data_section_asm_op,
9361
                           "\t.SPACE $PRIVATE$\n"
9362
                           "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9363
                           "ACCESS=31,SORT=24,COMDAT");
9364
 
9365
  /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9366
     which reference data within the $TEXT$ space (for example constant
9367
     strings in the $LIT$ subspace).
9368
 
9369
     The assemblers (GAS and HP as) both have problems with handling
9370
     the difference of two symbols which is the other correct way to
9371
     reference constant data during PIC code generation.
9372
 
9373
     So, there's no way to reference constant data which is in the
9374
     $TEXT$ space during PIC generation.  Instead place all constant
9375
     data into the $PRIVATE$ subspace (this reduces sharing, but it
9376
     works correctly).  */
9377
  readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9378
 
9379
  /* We must not have a reference to an external symbol defined in a
9380
     shared library in a readonly section, else the SOM linker will
9381
     complain.
9382
 
9383
     So, we force exception information into the data section.  */
9384
  exception_section = data_section;
9385
}
9386
 
9387
/* On hpux10, the linker will give an error if we have a reference
9388
   in the read-only data section to a symbol defined in a shared
9389
   library.  Therefore, expressions that might require a reloc can
9390
   not be placed in the read-only data section.  */
9391
 
9392
static section *
9393
pa_select_section (tree exp, int reloc,
9394
                   unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9395
{
9396
  if (TREE_CODE (exp) == VAR_DECL
9397
      && TREE_READONLY (exp)
9398
      && !TREE_THIS_VOLATILE (exp)
9399
      && DECL_INITIAL (exp)
9400
      && (DECL_INITIAL (exp) == error_mark_node
9401
          || TREE_CONSTANT (DECL_INITIAL (exp)))
9402
      && !reloc)
9403
    {
9404
      if (TARGET_SOM
9405
          && DECL_ONE_ONLY (exp)
9406
          && !DECL_WEAK (exp))
9407
        return som_one_only_readonly_data_section;
9408
      else
9409
        return readonly_data_section;
9410
    }
9411
  else if (CONSTANT_CLASS_P (exp) && !reloc)
9412
    return readonly_data_section;
9413
  else if (TARGET_SOM
9414
           && TREE_CODE (exp) == VAR_DECL
9415
           && DECL_ONE_ONLY (exp)
9416
           && !DECL_WEAK (exp))
9417
    return som_one_only_data_section;
9418
  else
9419
    return data_section;
9420
}
9421
 
9422
static void
9423
pa_globalize_label (FILE *stream, const char *name)
9424
{
9425
  /* We only handle DATA objects here, functions are globalized in
9426
     ASM_DECLARE_FUNCTION_NAME.  */
9427
  if (! FUNCTION_NAME_P (name))
9428
  {
9429
    fputs ("\t.EXPORT ", stream);
9430
    assemble_name (stream, name);
9431
    fputs (",DATA\n", stream);
9432
  }
9433
}
9434
 
9435
/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9436
 
9437
static rtx
9438
pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9439
                     int incoming ATTRIBUTE_UNUSED)
9440
{
9441
  return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9442
}
9443
 
9444
/* Worker function for TARGET_RETURN_IN_MEMORY.  */
9445
 
9446
bool
9447
pa_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED)
9448
{
9449
  /* SOM ABI says that objects larger than 64 bits are returned in memory.
9450
     PA64 ABI says that objects larger than 128 bits are returned in memory.
9451
     Note, int_size_in_bytes can return -1 if the size of the object is
9452
     variable or larger than the maximum value that can be expressed as
9453
     a HOST_WIDE_INT.   It can also return zero for an empty type.  The
9454
     simplest way to handle variable and empty types is to pass them in
9455
     memory.  This avoids problems in defining the boundaries of argument
9456
     slots, allocating registers, etc.  */
9457
  return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9458
          || int_size_in_bytes (type) <= 0);
9459
}
9460
 
9461
/* Structure to hold declaration and name of external symbols that are
9462
   emitted by GCC.  We generate a vector of these symbols and output them
9463
   at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9464
   This avoids putting out names that are never really used.  */
9465
 
9466
typedef struct extern_symbol GTY(())
9467
{
9468
  tree decl;
9469
  const char *name;
9470
} extern_symbol;
9471
 
9472
/* Define gc'd vector type for extern_symbol.  */
9473
DEF_VEC_O(extern_symbol);
9474
DEF_VEC_ALLOC_O(extern_symbol,gc);
9475
 
9476
/* Vector of extern_symbol pointers.  */
9477
static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
9478
 
9479
#ifdef ASM_OUTPUT_EXTERNAL_REAL
9480
/* Mark DECL (name NAME) as an external reference (assembler output
9481
   file FILE).  This saves the names to output at the end of the file
9482
   if actually referenced.  */
9483
 
9484
void
9485
pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9486
{
9487
  extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
9488
 
9489
  gcc_assert (file == asm_out_file);
9490
  p->decl = decl;
9491
  p->name = name;
9492
}
9493
 
9494
/* Output text required at the end of an assembler file.
9495
   This includes deferred plabels and .import directives for
9496
   all external symbols that were actually referenced.  */
9497
 
9498
static void
9499
pa_hpux_file_end (void)
9500
{
9501
  unsigned int i;
9502
  extern_symbol *p;
9503
 
9504
  if (!NO_DEFERRED_PROFILE_COUNTERS)
9505
    output_deferred_profile_counters ();
9506
 
9507
  output_deferred_plabels ();
9508
 
9509
  for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9510
    {
9511
      tree decl = p->decl;
9512
 
9513
      if (!TREE_ASM_WRITTEN (decl)
9514
          && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9515
        ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9516
    }
9517
 
9518
  VEC_free (extern_symbol, gc, extern_symbols);
9519
}
9520
#endif
9521
 
9522
#include "gt-pa.h"

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.