OpenCores

;;

;;

;; Pipeline description for the VR4130 family.

;; Pipeline description for the VR4130 family.

;;

;;

;; The processor issues each 8-byte aligned pair of instructions together,

;; The processor issues each 8-byte aligned pair of instructions together,

;; stalling the second instruction if it depends on the first.  Thus, if we

;; stalling the second instruction if it depends on the first.  Thus, if we

;; want two instructions to issue in parallel, we need to make sure that the

;; want two instructions to issue in parallel, we need to make sure that the

;; first one is 8-byte aligned.

;; first one is 8-byte aligned.

;;

;;

;; For the purposes of this pipeline description, we treat the processor

;; For the purposes of this pipeline description, we treat the processor

;; like a standard two-way superscalar architecture.  If scheduling were

;; like a standard two-way superscalar architecture.  If scheduling were

;; the last pass to run, we could use the scheduler hooks to vary the

;; the last pass to run, we could use the scheduler hooks to vary the

;; issue rate depending on whether an instruction is at an aligned or

;; issue rate depending on whether an instruction is at an aligned or

;; unaligned address.  Unfortunately, delayed branch scheduling and

;; unaligned address.  Unfortunately, delayed branch scheduling and

;; hazard avoidance are done after the final scheduling pass, and they

;; hazard avoidance are done after the final scheduling pass, and they

;; can change the addresses of many instructions.

;; can change the addresses of many instructions.

;;

;;

;; We get around this in two ways:

;; We get around this in two ways:

;;

;;

;;   (1) By running an extra pass at the end of compilation.  This pass goes

;;   (1) By running an extra pass at the end of compilation.  This pass goes

;;       through the function looking for pairs of instructions that could

;;       through the function looking for pairs of instructions that could

;;       execute in parallel.  It makes sure that the first instruction in

;;       execute in parallel.  It makes sure that the first instruction in

;;       each pair is suitably aligned, inserting nops if necessary.  Doing

;;       each pair is suitably aligned, inserting nops if necessary.  Doing

;;       this gives the same kind of pipeline behavior we would see on a

;;       this gives the same kind of pipeline behavior we would see on a

;;       normal superscalar target.

;;       normal superscalar target.

;;

;;

;;       This pass is generally a speed improvement, but the extra nops will

;;       This pass is generally a speed improvement, but the extra nops will

;;       obviously make the program bigger.  It is therefore unsuitable for

;;       obviously make the program bigger.  It is therefore unsuitable for

;;       -Os (at the very least).

;;       -Os (at the very least).

;;

;;

;;   (2) By modifying the scheduler hooks so that, where possible:

;;   (2) By modifying the scheduler hooks so that, where possible:

;;

;;

;;       (a) dependent instructions are separated by a non-dependent

;;       (a) dependent instructions are separated by a non-dependent

;;           instruction;

;;           instruction;

;;

;;

;;       (b) instructions that use the multiplication unit are separated

;;       (b) instructions that use the multiplication unit are separated

;;           by non-multiplication instructions; and

;;           by non-multiplication instructions; and

;;

;;

;;       (c) memory access instructions are separated by non-memory

;;       (c) memory access instructions are separated by non-memory

;;           instructions.

;;           instructions.

;;

;;

;;       The idea is to keep conflicting instructions apart wherever possible

;;       The idea is to keep conflicting instructions apart wherever possible

;;       and thus make the schedule less dependent on alignment.

;;       and thus make the schedule less dependent on alignment.

(define_automaton "vr4130_main, vr4130_muldiv, vr4130_mulpre")

(define_automaton "vr4130_main, vr4130_muldiv, vr4130_mulpre")

(define_cpu_unit "vr4130_alu1, vr4130_alu2, vr4130_dcache" "vr4130_main")

(define_cpu_unit "vr4130_alu1, vr4130_alu2, vr4130_dcache" "vr4130_main")

(define_cpu_unit "vr4130_muldiv" "vr4130_muldiv")

(define_cpu_unit "vr4130_muldiv" "vr4130_muldiv")

;; This is a fake unit for pre-reload scheduling of multiplications.

;; This is a fake unit for pre-reload scheduling of multiplications.

;; It enforces the true post-reload repeat rate.

;; It enforces the true post-reload repeat rate.

(define_cpu_unit "vr4130_mulpre" "vr4130_mulpre")

(define_cpu_unit "vr4130_mulpre" "vr4130_mulpre")

;; The scheduling hooks use this attribute for (b) above.

;; The scheduling hooks use this attribute for (b) above.

(define_attr "vr4130_class" "mul,mem,alu"

(define_attr "vr4130_class" "mul,mem,alu"

  (cond [(eq_attr "type" "load,store")

  (cond [(eq_attr "type" "load,store")

         (const_string "mem")

         (const_string "mem")

         (eq_attr "type" "mfhilo,mthilo,imul,imul3,imadd,idiv")

         (eq_attr "type" "mfhilo,mthilo,imul,imul3,imadd,idiv")

         (const_string "mul")]

         (const_string "mul")]

        (const_string "alu")))

        (const_string "alu")))

(define_insn_reservation "vr4130_multi" 1

(define_insn_reservation "vr4130_multi" 1

  (and (eq_attr "cpu" "r4130")

  (and (eq_attr "cpu" "r4130")

       (eq_attr "type" "multi,unknown"))

       (eq_attr "type" "multi,unknown"))

  "vr4130_alu1 + vr4130_alu2 + vr4130_dcache + vr4130_muldiv")

  "vr4130_alu1 + vr4130_alu2 + vr4130_dcache + vr4130_muldiv")

(define_insn_reservation "vr4130_int" 1

(define_insn_reservation "vr4130_int" 1

  (and (eq_attr "cpu" "r4130")

  (and (eq_attr "cpu" "r4130")

       (eq_attr "type" "const,arith,shift,slt,nop"))

       (eq_attr "type" "const,arith,shift,slt,nop"))

  "vr4130_alu1 | vr4130_alu2")

  "vr4130_alu1 | vr4130_alu2")

(define_insn_reservation "vr4130_load" 3

(define_insn_reservation "vr4130_load" 3

  (and (eq_attr "cpu" "r4130")

  (and (eq_attr "cpu" "r4130")

       (eq_attr "type" "load"))

       (eq_attr "type" "load"))

  "vr4130_dcache")

  "vr4130_dcache")

(define_insn_reservation "vr4130_store" 1

(define_insn_reservation "vr4130_store" 1

  (and (eq_attr "cpu" "r4130")

  (and (eq_attr "cpu" "r4130")

       (eq_attr "type" "store"))

       (eq_attr "type" "store"))

  "vr4130_dcache")

  "vr4130_dcache")

(define_insn_reservation "vr4130_mfhilo" 3

(define_insn_reservation "vr4130_mfhilo" 3

  (and (eq_attr "cpu" "r4130")

  (and (eq_attr "cpu" "r4130")

       (eq_attr "type" "mfhilo"))

       (eq_attr "type" "mfhilo"))

  "vr4130_muldiv")

  "vr4130_muldiv")

(define_insn_reservation "vr4130_mthilo" 1

(define_insn_reservation "vr4130_mthilo" 1

  (and (eq_attr "cpu" "r4130")

  (and (eq_attr "cpu" "r4130")

       (eq_attr "type" "mthilo"))

       (eq_attr "type" "mthilo"))

  "vr4130_muldiv")

  "vr4130_muldiv")

;; The product is available in LO & HI after one cycle.  Moving the result

;; The product is available in LO & HI after one cycle.  Moving the result

;; into an integer register will take an additional three cycles, see mflo

;; into an integer register will take an additional three cycles, see mflo

;; & mfhi above.  Note that the same latencies and repeat rates apply if we

;; & mfhi above.  Note that the same latencies and repeat rates apply if we

;; use "mtlo; macc" instead of "mult; mflo".

;; use "mtlo; macc" instead of "mult; mflo".

(define_insn_reservation "vr4130_mulsi" 4

(define_insn_reservation "vr4130_mulsi" 4

  (and (eq_attr "cpu" "r4130")

  (and (eq_attr "cpu" "r4130")

       (and (eq_attr "type" "imul,imul3")

       (and (eq_attr "type" "imul,imul3")

            (eq_attr "mode" "SI")))

            (eq_attr "mode" "SI")))

  "vr4130_muldiv + (vr4130_mulpre * 2)")

  "vr4130_muldiv + (vr4130_mulpre * 2)")

;; As for vr4130_mulsi, but the product is available in LO and HI

;; As for vr4130_mulsi, but the product is available in LO and HI

;; after 3 cycles.

;; after 3 cycles.

(define_insn_reservation "vr4130_muldi" 6

(define_insn_reservation "vr4130_muldi" 6

  (and (eq_attr "cpu" "r4130")

  (and (eq_attr "cpu" "r4130")

       (and (eq_attr "type" "imul,imul3")

       (and (eq_attr "type" "imul,imul3")

            (eq_attr "mode" "DI")))

            (eq_attr "mode" "DI")))

  "(vr4130_muldiv * 3) + (vr4130_mulpre * 4)")

  "(vr4130_muldiv * 3) + (vr4130_mulpre * 4)")

;; maccs can execute in consecutive cycles without stalling, but it

;; maccs can execute in consecutive cycles without stalling, but it

;; is 3 cycles before the integer destination can be read.

;; is 3 cycles before the integer destination can be read.

(define_insn_reservation "vr4130_macc" 3

(define_insn_reservation "vr4130_macc" 3

  (and (eq_attr "cpu" "r4130")

  (and (eq_attr "cpu" "r4130")

       (eq_attr "type" "imadd"))

       (eq_attr "type" "imadd"))

  "vr4130_muldiv")

  "vr4130_muldiv")

(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_macc" "mips_linked_madd_p")

(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_macc" "mips_linked_madd_p")

(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_mfhilo")

(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_mfhilo")

(define_bypass 3 "vr4130_muldi" "vr4130_mfhilo")

(define_bypass 3 "vr4130_muldi" "vr4130_mfhilo")

(define_insn_reservation "vr4130_divsi" 36

(define_insn_reservation "vr4130_divsi" 36

  (and (eq_attr "cpu" "r4130")

  (and (eq_attr "cpu" "r4130")

       (and (eq_attr "type" "idiv")

       (and (eq_attr "type" "idiv")

            (eq_attr "mode" "SI")))

            (eq_attr "mode" "SI")))

  "vr4130_muldiv * 36")

  "vr4130_muldiv * 36")

(define_insn_reservation "vr4130_divdi" 72

(define_insn_reservation "vr4130_divdi" 72

  (and (eq_attr "cpu" "r4130")

  (and (eq_attr "cpu" "r4130")

       (and (eq_attr "type" "idiv")

       (and (eq_attr "type" "idiv")

            (eq_attr "mode" "DI")))

            (eq_attr "mode" "DI")))

  "vr4130_muldiv * 72")

  "vr4130_muldiv * 72")

(define_insn_reservation "vr4130_branch" 0

(define_insn_reservation "vr4130_branch" 0

  (and (eq_attr "cpu" "r4130")

  (and (eq_attr "cpu" "r4130")

       (eq_attr "type" "branch,jump,call"))

       (eq_attr "type" "branch,jump,call"))

  "vr4130_alu1 | vr4130_alu2")

  "vr4130_alu1 | vr4130_alu2")

Browse

Tools

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-old/] [gcc-4.2.2/] [gcc/] [config/] [mips/] [4130.md] - Diff between revs 154 and 816

Rev 154	Rev 816
`;;`	`;;`
`;; Pipeline description for the VR4130 family.`	`;; Pipeline description for the VR4130 family.`
`;;`	`;;`
`;; The processor issues each 8-byte aligned pair of instructions together,`	`;; The processor issues each 8-byte aligned pair of instructions together,`
`;; stalling the second instruction if it depends on the first. Thus, if we`	`;; stalling the second instruction if it depends on the first. Thus, if we`
`;; want two instructions to issue in parallel, we need to make sure that the`	`;; want two instructions to issue in parallel, we need to make sure that the`
`;; first one is 8-byte aligned.`	`;; first one is 8-byte aligned.`
`;;`	`;;`
`;; For the purposes of this pipeline description, we treat the processor`	`;; For the purposes of this pipeline description, we treat the processor`
`;; like a standard two-way superscalar architecture. If scheduling were`	`;; like a standard two-way superscalar architecture. If scheduling were`
`;; the last pass to run, we could use the scheduler hooks to vary the`	`;; the last pass to run, we could use the scheduler hooks to vary the`
`;; issue rate depending on whether an instruction is at an aligned or`	`;; issue rate depending on whether an instruction is at an aligned or`
`;; unaligned address. Unfortunately, delayed branch scheduling and`	`;; unaligned address. Unfortunately, delayed branch scheduling and`
`;; hazard avoidance are done after the final scheduling pass, and they`	`;; hazard avoidance are done after the final scheduling pass, and they`
`;; can change the addresses of many instructions.`	`;; can change the addresses of many instructions.`
`;;`	`;;`
`;; We get around this in two ways:`	`;; We get around this in two ways:`
`;;`	`;;`
`;; (1) By running an extra pass at the end of compilation. This pass goes`	`;; (1) By running an extra pass at the end of compilation. This pass goes`
`;; through the function looking for pairs of instructions that could`	`;; through the function looking for pairs of instructions that could`
`;; execute in parallel. It makes sure that the first instruction in`	`;; execute in parallel. It makes sure that the first instruction in`
`;; each pair is suitably aligned, inserting nops if necessary. Doing`	`;; each pair is suitably aligned, inserting nops if necessary. Doing`
`;; this gives the same kind of pipeline behavior we would see on a`	`;; this gives the same kind of pipeline behavior we would see on a`
`;; normal superscalar target.`	`;; normal superscalar target.`
`;;`	`;;`
`;; This pass is generally a speed improvement, but the extra nops will`	`;; This pass is generally a speed improvement, but the extra nops will`
`;; obviously make the program bigger. It is therefore unsuitable for`	`;; obviously make the program bigger. It is therefore unsuitable for`
`;; -Os (at the very least).`	`;; -Os (at the very least).`
`;;`	`;;`
`;; (2) By modifying the scheduler hooks so that, where possible:`	`;; (2) By modifying the scheduler hooks so that, where possible:`
`;;`	`;;`
`;; (a) dependent instructions are separated by a non-dependent`	`;; (a) dependent instructions are separated by a non-dependent`
`;; instruction;`	`;; instruction;`
`;;`	`;;`
`;; (b) instructions that use the multiplication unit are separated`	`;; (b) instructions that use the multiplication unit are separated`
`;; by non-multiplication instructions; and`	`;; by non-multiplication instructions; and`
`;;`	`;;`
`;; (c) memory access instructions are separated by non-memory`	`;; (c) memory access instructions are separated by non-memory`
`;; instructions.`	`;; instructions.`
`;;`	`;;`
`;; The idea is to keep conflicting instructions apart wherever possible`	`;; The idea is to keep conflicting instructions apart wherever possible`
`;; and thus make the schedule less dependent on alignment.`	`;; and thus make the schedule less dependent on alignment.`

`(define_automaton "vr4130_main, vr4130_muldiv, vr4130_mulpre")`	`(define_automaton "vr4130_main, vr4130_muldiv, vr4130_mulpre")`

`(define_cpu_unit "vr4130_alu1, vr4130_alu2, vr4130_dcache" "vr4130_main")`	`(define_cpu_unit "vr4130_alu1, vr4130_alu2, vr4130_dcache" "vr4130_main")`
`(define_cpu_unit "vr4130_muldiv" "vr4130_muldiv")`	`(define_cpu_unit "vr4130_muldiv" "vr4130_muldiv")`

`;; This is a fake unit for pre-reload scheduling of multiplications.`	`;; This is a fake unit for pre-reload scheduling of multiplications.`
`;; It enforces the true post-reload repeat rate.`	`;; It enforces the true post-reload repeat rate.`
`(define_cpu_unit "vr4130_mulpre" "vr4130_mulpre")`	`(define_cpu_unit "vr4130_mulpre" "vr4130_mulpre")`

`;; The scheduling hooks use this attribute for (b) above.`	`;; The scheduling hooks use this attribute for (b) above.`
`(define_attr "vr4130_class" "mul,mem,alu"`	`(define_attr "vr4130_class" "mul,mem,alu"`
`(cond [(eq_attr "type" "load,store")`	`(cond [(eq_attr "type" "load,store")`
`(const_string "mem")`	`(const_string "mem")`

`(eq_attr "type" "mfhilo,mthilo,imul,imul3,imadd,idiv")`	`(eq_attr "type" "mfhilo,mthilo,imul,imul3,imadd,idiv")`
`(const_string "mul")]`	`(const_string "mul")]`
`(const_string "alu")))`	`(const_string "alu")))`

`(define_insn_reservation "vr4130_multi" 1`	`(define_insn_reservation "vr4130_multi" 1`
`(and (eq_attr "cpu" "r4130")`	`(and (eq_attr "cpu" "r4130")`
`(eq_attr "type" "multi,unknown"))`	`(eq_attr "type" "multi,unknown"))`
`"vr4130_alu1 + vr4130_alu2 + vr4130_dcache + vr4130_muldiv")`	`"vr4130_alu1 + vr4130_alu2 + vr4130_dcache + vr4130_muldiv")`

`(define_insn_reservation "vr4130_int" 1`	`(define_insn_reservation "vr4130_int" 1`
`(and (eq_attr "cpu" "r4130")`	`(and (eq_attr "cpu" "r4130")`
`(eq_attr "type" "const,arith,shift,slt,nop"))`	`(eq_attr "type" "const,arith,shift,slt,nop"))`
`"vr4130_alu1 \| vr4130_alu2")`	`"vr4130_alu1 \| vr4130_alu2")`

`(define_insn_reservation "vr4130_load" 3`	`(define_insn_reservation "vr4130_load" 3`
`(and (eq_attr "cpu" "r4130")`	`(and (eq_attr "cpu" "r4130")`
`(eq_attr "type" "load"))`	`(eq_attr "type" "load"))`
`"vr4130_dcache")`	`"vr4130_dcache")`

`(define_insn_reservation "vr4130_store" 1`	`(define_insn_reservation "vr4130_store" 1`
`(and (eq_attr "cpu" "r4130")`	`(and (eq_attr "cpu" "r4130")`
`(eq_attr "type" "store"))`	`(eq_attr "type" "store"))`
`"vr4130_dcache")`	`"vr4130_dcache")`

`(define_insn_reservation "vr4130_mfhilo" 3`	`(define_insn_reservation "vr4130_mfhilo" 3`
`(and (eq_attr "cpu" "r4130")`	`(and (eq_attr "cpu" "r4130")`
`(eq_attr "type" "mfhilo"))`	`(eq_attr "type" "mfhilo"))`
`"vr4130_muldiv")`	`"vr4130_muldiv")`

`(define_insn_reservation "vr4130_mthilo" 1`	`(define_insn_reservation "vr4130_mthilo" 1`
`(and (eq_attr "cpu" "r4130")`	`(and (eq_attr "cpu" "r4130")`
`(eq_attr "type" "mthilo"))`	`(eq_attr "type" "mthilo"))`
`"vr4130_muldiv")`	`"vr4130_muldiv")`

`;; The product is available in LO & HI after one cycle. Moving the result`	`;; The product is available in LO & HI after one cycle. Moving the result`
`;; into an integer register will take an additional three cycles, see mflo`	`;; into an integer register will take an additional three cycles, see mflo`
`;; & mfhi above. Note that the same latencies and repeat rates apply if we`	`;; & mfhi above. Note that the same latencies and repeat rates apply if we`
`;; use "mtlo; macc" instead of "mult; mflo".`	`;; use "mtlo; macc" instead of "mult; mflo".`
`(define_insn_reservation "vr4130_mulsi" 4`	`(define_insn_reservation "vr4130_mulsi" 4`
`(and (eq_attr "cpu" "r4130")`	`(and (eq_attr "cpu" "r4130")`
`(and (eq_attr "type" "imul,imul3")`	`(and (eq_attr "type" "imul,imul3")`
`(eq_attr "mode" "SI")))`	`(eq_attr "mode" "SI")))`
`"vr4130_muldiv + (vr4130_mulpre * 2)")`	`"vr4130_muldiv + (vr4130_mulpre * 2)")`

`;; As for vr4130_mulsi, but the product is available in LO and HI`	`;; As for vr4130_mulsi, but the product is available in LO and HI`
`;; after 3 cycles.`	`;; after 3 cycles.`
`(define_insn_reservation "vr4130_muldi" 6`	`(define_insn_reservation "vr4130_muldi" 6`
`(and (eq_attr "cpu" "r4130")`	`(and (eq_attr "cpu" "r4130")`
`(and (eq_attr "type" "imul,imul3")`	`(and (eq_attr "type" "imul,imul3")`
`(eq_attr "mode" "DI")))`	`(eq_attr "mode" "DI")))`
`"(vr4130_muldiv * 3) + (vr4130_mulpre * 4)")`	`"(vr4130_muldiv * 3) + (vr4130_mulpre * 4)")`

`;; maccs can execute in consecutive cycles without stalling, but it`	`;; maccs can execute in consecutive cycles without stalling, but it`
`;; is 3 cycles before the integer destination can be read.`	`;; is 3 cycles before the integer destination can be read.`
`(define_insn_reservation "vr4130_macc" 3`	`(define_insn_reservation "vr4130_macc" 3`
`(and (eq_attr "cpu" "r4130")`	`(and (eq_attr "cpu" "r4130")`
`(eq_attr "type" "imadd"))`	`(eq_attr "type" "imadd"))`
`"vr4130_muldiv")`	`"vr4130_muldiv")`

`(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_macc" "mips_linked_madd_p")`	`(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_macc" "mips_linked_madd_p")`
`(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_mfhilo")`	`(define_bypass 1 "vr4130_mulsi,vr4130_macc" "vr4130_mfhilo")`
`(define_bypass 3 "vr4130_muldi" "vr4130_mfhilo")`	`(define_bypass 3 "vr4130_muldi" "vr4130_mfhilo")`

`(define_insn_reservation "vr4130_divsi" 36`	`(define_insn_reservation "vr4130_divsi" 36`
`(and (eq_attr "cpu" "r4130")`	`(and (eq_attr "cpu" "r4130")`
`(and (eq_attr "type" "idiv")`	`(and (eq_attr "type" "idiv")`
`(eq_attr "mode" "SI")))`	`(eq_attr "mode" "SI")))`
`"vr4130_muldiv * 36")`	`"vr4130_muldiv * 36")`

`(define_insn_reservation "vr4130_divdi" 72`	`(define_insn_reservation "vr4130_divdi" 72`
`(and (eq_attr "cpu" "r4130")`	`(and (eq_attr "cpu" "r4130")`
`(and (eq_attr "type" "idiv")`	`(and (eq_attr "type" "idiv")`
`(eq_attr "mode" "DI")))`	`(eq_attr "mode" "DI")))`
`"vr4130_muldiv * 72")`	`"vr4130_muldiv * 72")`

`(define_insn_reservation "vr4130_branch" 0`	`(define_insn_reservation "vr4130_branch" 0`
`(and (eq_attr "cpu" "r4130")`	`(and (eq_attr "cpu" "r4130")`
`(eq_attr "type" "branch,jump,call"))`	`(eq_attr "type" "branch,jump,call"))`
`"vr4130_alu1 \| vr4130_alu2")`	`"vr4130_alu1 \| vr4130_alu2")`