URL
                    https://opencores.org/ocsvn/openrisc/openrisc/trunk
                
            Subversion Repositories openrisc
[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [config/] [mips/] [10000.md] - Rev 852
Go to most recent revision | Compare with Previous | Blame | View Log
;; DFA-based pipeline description for the VR1x000.;; Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc.;;;; This file is part of GCC.;; GCC is free software; you can redistribute it and/or modify it;; under the terms of the GNU General Public License as published;; by the Free Software Foundation; either version 3, or (at your;; option) any later version.;; GCC is distributed in the hope that it will be useful, but WITHOUT;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public;; License for more details.;; You should have received a copy of the GNU General Public License;; along with GCC; see the file COPYING3. If not see;; <http://www.gnu.org/licenses/>.;; R12K/R14K/R16K are derivatives of R10K, thus copy its description;; until specific tuning for each is added.;; R10000 has an int queue, fp queue, address queue.;; The int queue feeds ALU1 and ALU2.;; The fp queue feeds the fp-adder and fp-multiplier.;; The addr queue feeds the Load/Store unit.;;;; However, we define the fp-adder and fp-multiplier as;; separate automatons, because the fp-multiplier is;; divided into fp-multiplier, fp-division, and;; fp-squareroot units, all of which share the same;; issue and completion logic, yet can operate in;; parallel.;;;; This is based on the model described in the R10K Manual;; and it helps to reduce the size of the automata.(define_automaton "r10k_a_int, r10k_a_fpadder, r10k_a_addr,r10k_a_fpmpy, r10k_a_fpdiv, r10k_a_fpsqrt")(define_cpu_unit "r10k_alu1" "r10k_a_int")(define_cpu_unit "r10k_alu2" "r10k_a_int")(define_cpu_unit "r10k_fpadd" "r10k_a_fpadder")(define_cpu_unit "r10k_fpmpy" "r10k_a_fpmpy")(define_cpu_unit "r10k_fpdiv" "r10k_a_fpdiv")(define_cpu_unit "r10k_fpsqrt" "r10k_a_fpsqrt")(define_cpu_unit "r10k_loadstore" "r10k_a_addr");; R10k Loads and Stores.(define_insn_reservation "r10k_load" 2(and (eq_attr "cpu" "r10000")(eq_attr "type" "load,prefetch,prefetchx"))"r10k_loadstore")(define_insn_reservation "r10k_store" 0(and (eq_attr "cpu" "r10000")(eq_attr "type" "store,fpstore,fpidxstore"))"r10k_loadstore")(define_insn_reservation "r10k_fpload" 3(and (eq_attr "cpu" "r10000")(eq_attr "type" "fpload,fpidxload"))"r10k_loadstore");; Integer add/sub + logic ops, and mt hi/lo can be done by alu1 or alu2.;; Miscellaneous arith goes here too (this is a guess).(define_insn_reservation "r10k_arith" 1(and (eq_attr "cpu" "r10000")(eq_attr "type" "arith,mthilo,slt,clz,const,nop,trap,logical"))"r10k_alu1 | r10k_alu2");; We treat mfhilo differently, because we need to know when;; it's HI and when it's LO.(define_insn_reservation "r10k_mfhi" 1(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "mfhilo")(not (match_operand 1 "lo_operand"))))"r10k_alu1 | r10k_alu2")(define_insn_reservation "r10k_mflo" 1(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "mfhilo")(match_operand 1 "lo_operand")))"r10k_alu1 | r10k_alu2");; ALU1 handles shifts, branch eval, and condmove.;;;; Brancher is separate, but part of ALU1, but can only;; do one branch per cycle (is this even implementable?).;;;; Unsure if the brancher handles jumps and calls as well, but since;; they're related, we'll add them here for now.(define_insn_reservation "r10k_brancher" 1(and (eq_attr "cpu" "r10000")(eq_attr "type" "shift,branch,jump,call"))"r10k_alu1")(define_insn_reservation "r10k_int_cmove" 1(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "condmove")(eq_attr "mode" "SI,DI")))"r10k_alu1");; Coprocessor Moves.;; mtc1/dmtc1 are handled by ALU1.;; mfc1/dmfc1 are handled by the fp-multiplier.(define_insn_reservation "r10k_mt_xfer" 3(and (eq_attr "cpu" "r10000")(eq_attr "type" "mtc"))"r10k_alu1")(define_insn_reservation "r10k_mf_xfer" 2(and (eq_attr "cpu" "r10000")(eq_attr "type" "mfc"))"r10k_fpmpy");; Only ALU2 does int multiplications and divisions.;;;; According to the Vr10000 series user manual,;; integer mult and div insns can be issued one;; cycle earlier if using register Lo. We model;; this by using the Lo value by default, as it;; is the more common value, and use a bypass;; for the Hi value when needed.;;;; Also of note, There are different latencies;; for MULT/DMULT (Lo 5/Hi 6) and MULTU/DMULTU (Lo 6/Hi 7).;; However, gcc does not have separate types;; for these insns. Thus to strike a balance,;; we use the Hi latency value for imul;; operations until the imul type can be split.(define_insn_reservation "r10k_imul_single" 6(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "imul,imul3")(eq_attr "mode" "SI")))"r10k_alu2 * 6")(define_insn_reservation "r10k_imul_double" 10(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "imul,imul3")(eq_attr "mode" "DI")))"r10k_alu2 * 10");; Divides keep ALU2 busy.(define_insn_reservation "r10k_idiv_single" 34(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "idiv")(eq_attr "mode" "SI")))"r10k_alu2 * 35")(define_insn_reservation "r10k_idiv_double" 66(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "idiv")(eq_attr "mode" "DI")))"r10k_alu2 * 67")(define_bypass 35 "r10k_idiv_single" "r10k_mfhi")(define_bypass 67 "r10k_idiv_double" "r10k_mfhi");; Floating point add/sub, mul, abs value, neg, comp, & moves.(define_insn_reservation "r10k_fp_miscadd" 2(and (eq_attr "cpu" "r10000")(eq_attr "type" "fadd,fabs,fneg,fcmp"))"r10k_fpadd")(define_insn_reservation "r10k_fp_miscmul" 2(and (eq_attr "cpu" "r10000")(eq_attr "type" "fmul,fmove"))"r10k_fpmpy")(define_insn_reservation "r10k_fp_cmove" 2(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "condmove")(eq_attr "mode" "SF,DF")))"r10k_fpmpy");; The fcvt.s.[wl] insn has latency 4, repeat 2.;; All other fcvt insns have latency 2, repeat 1.(define_insn_reservation "r10k_fcvt_single" 4(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "fcvt")(eq_attr "cnv_mode" "I2S")))"r10k_fpadd * 2")(define_insn_reservation "r10k_fcvt_other" 2(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "fcvt")(eq_attr "cnv_mode" "!I2S")))"r10k_fpadd");; Run the fmadd insn through fp-adder first, then fp-multiplier.;;;; The latency for fmadd is 2 cycles if the result is used;; by another fmadd instruction.(define_insn_reservation "r10k_fmadd" 4(and (eq_attr "cpu" "r10000")(eq_attr "type" "fmadd"))"r10k_fpadd, r10k_fpmpy")(define_bypass 2 "r10k_fmadd" "r10k_fmadd");; Floating point Divisions & square roots.(define_insn_reservation "r10k_fdiv_single" 12(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "fdiv,frdiv")(eq_attr "mode" "SF")))"r10k_fpdiv * 14")(define_insn_reservation "r10k_fdiv_double" 19(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "fdiv,frdiv")(eq_attr "mode" "DF")))"r10k_fpdiv * 21")(define_insn_reservation "r10k_fsqrt_single" 18(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "fsqrt")(eq_attr "mode" "SF")))"r10k_fpsqrt * 20")(define_insn_reservation "r10k_fsqrt_double" 33(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "fsqrt")(eq_attr "mode" "DF")))"r10k_fpsqrt * 35")(define_insn_reservation "r10k_frsqrt_single" 30(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "frsqrt")(eq_attr "mode" "SF")))"r10k_fpsqrt * 20")(define_insn_reservation "r10k_frsqrt_double" 52(and (eq_attr "cpu" "r10000")(and (eq_attr "type" "frsqrt")(eq_attr "mode" "DF")))"r10k_fpsqrt * 35");; Handle unknown/multi insns here (this is a guess).(define_insn_reservation "r10k_unknown" 1(and (eq_attr "cpu" "r10000")(eq_attr "type" "unknown,multi"))"r10k_alu1 + r10k_alu2")
Go to most recent revision | Compare with Previous | Blame | View Log
