URL
https://opencores.org/ocsvn/open8_urisc/open8_urisc/trunk
Subversion Repositories open8_urisc
[/] [open8_urisc/] [trunk/] [gnu/] [binutils/] [ld/] [emultempl/] [spu_ovl.S] - Rev 145
Compare with Previous | Blame | View Log
/* Overlay manager for SPU.Copyright 2006, 2007, 2008 Free Software Foundation, Inc.This file is part of the GNU Binutils.This program is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 3 of the License, or(at your option) any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; if not, write to the Free SoftwareFoundation, Inc., 51 Franklin Street - Fifth Floor, Boston,MA 02110-1301, USA. *//* MFC DMA defn's. */#define MFC_GET_CMD 0x40#define MFC_MAX_DMA_SIZE 0x4000#define MFC_TAG_UPDATE_ALL 2#define MFC_TAG_ID 0/* Register usage. */#define reserved1 $75#define parm $75#define tab1 reserved1#define tab2 reserved1#define vma reserved1#define oldvma reserved1#define newmask reserved1#define map reserved1#define reserved2 $76#define off1 reserved2#define off2 reserved2#define present1 reserved2#define present2 reserved2#define sz reserved2#define cmp reserved2#define add64 reserved2#define cgbits reserved2#define off3 reserved2#define off4 reserved2#define addr4 reserved2#define off5 reserved2#define tagstat reserved2#define reserved3 $77#define size1 reserved3#define size2 reserved3#define rv3 reserved3#define ealo reserved3#define cmd reserved3#define off64 reserved3#define tab3 reserved3#define tab4 reserved3#define tab5 reserved3#define reserved4 $78#define ovl reserved4#define rv2 reserved4#define rv5 reserved4#define cgshuf reserved4#define newovl reserved4#define irqtmp1 reserved4#define irqtmp2 reserved4#define reserved5 $79#define target reserved5#define save1 $74#define rv4 save1#define rv7 save1#define tagid save1#define maxsize save1#define pbyte save1#define pbit save1#define save2 $73#define cur save2#define rv6 save2#define osize save2#define zovl save2#define oldovl save2#define newvma save2#define save3 $72#define rv1 save3#define ea64 save3#define buf3 save3#define genwi save3#define newmap save3#define oldmask save3#define save4 $71#define irq_stat save4.text.align 4.type __rv_pattern, @object.size __rv_pattern, 16__rv_pattern:.word 0x00010203, 0x10111213, 0x80808080, 0x80808080.type __cg_pattern, @object.size __cg_pattern, 16__cg_pattern:.word 0x04050607, 0x80808080, 0x80808080, 0x80808080.type __ovly_current, @object.size __ovly_current, 16__ovly_current:.space 16/** __ovly_return - stub for returning from overlay functions.** On entry the four slots of $lr are:* __ovly_return, prev ovl index, caller return addr, undefined.** Load the previous overlay and jump to the caller return address.* Updates __ovly_current.*/.align 4.global __ovly_return.type __ovly_return, @function__ovly_return:ila tab1, _ovly_table - 16 # 0,2 0shlqbyi ovl, $lr, 4 # 1,4 0#nopshlqbyi target, $lr, 8 # 1,4 1#nop; lnop#nop; lnopshli off1, ovl, 4 # 0,4 4#lnop#nophbr ovly_ret9, target # 1,15 5#nop; lnop#nop; lnop#noplqx vma, tab1, off1 # 1,6 8#ifdef OVLY_IRQ_SAVEnopstqd save4, -64($sp) # 1,6 9#else#nop; lnop#endif#nop; lnop#nop; lnop#nop; lnop#nop; lnop#noprotqbyi size1, vma, 4 # 1,4 14#nopstqd save3, -48($sp) # 1,6 15#nopstqd save2, -32($sp) # 1,6 16#nopstqd save1, -16($sp) # 1,6 17andi present1, size1, 1 # 0,2 18stqr ovl, __ovly_current # 1,6 18#nop; lnop#nopbrz present1, do_load # 1,4 20ovly_ret9:#nopbi target # 1,4 21/** __ovly_load - copy an overlay partion to local store.** On entry $75 points to a word consisting of the overlay index in* the top 14 bits, and the target address in the bottom 18 bits.** Sets up $lr to return via __ovly_return. If $lr is already set* to return via __ovly_return, don't change it. In that case we* have a tail call from one overlay function to another.* Updates __ovly_current.*/.align 3.global __ovly_load.type __ovly_load, @function__ovly_load:#if OVL_STUB_SIZE == 8#########noplqd target, 0(parm) # 1,6 -11#nop; lnop#nop; lnop#nop; lnop#nop; lnop#nop; lnop#noprotqby target, target, parm # 1,4 -5ila tab2, _ovly_table - 16 # 0,2 -4stqd save3, -48($sp) # 1,6 -4#nopstqd save2, -32($sp) # 1,6 -3#nopstqd save1, -16($sp) # 1,6 -2rotmi ovl, target, -18 # 0,4 -1hbr ovly_load9, target # 1,15 -1ila rv1, __ovly_return # 0,2 0#lnop#nop; lnop#noplqr cur, __ovly_current # 1,6 2shli off2, ovl, 4 # 0,4 3stqr ovl, __ovly_current # 1,6 3ceq rv2, $lr, rv1 # 0,2 4lqr rv3, __rv_pattern # 1,6 4#nop; lnop#nop; lnop#noplqx vma, tab2, off2 # 1,6 7#########else /* OVL_STUB_SIZE == 16 */########ila tab2, _ovly_table - 16 # 0,2 0stqd save3, -48($sp) # 1,6 0ila rv1, __ovly_return # 0,2 1stqd save2, -32($sp) # 1,6 1shli off2, ovl, 4 # 0,4 2lqr cur, __ovly_current # 1,6 2nopstqr ovl, __ovly_current # 1,6 3ceq rv2, $lr, rv1 # 0,2 4lqr rv3, __rv_pattern # 1,6 4#nophbr ovly_load9, target # 1,15 5#noplqx vma, tab2, off2 # 1,6 6#nopstqd save1, -16($sp) # 1,6 7#########endif#nop; lnop#nop; lnop#nopshufb rv4, rv1, cur, rv3 # 1,4 10#nopfsmb rv5, rv2 # 1,4 11#noprotqmbyi rv6, $lr, -8 # 1,4 12#noprotqbyi size2, vma, 4 # 1,4 13#noplqd save3, -48($sp) # 1,6 14#nop; lnopor rv7, rv4, rv6 # 0,2 16lqd save2, -32($sp) # 1,6 16andi present2, size2, 1 # 0,2 17#ifdef OVLY_IRQ_SAVEstqd save4, -64($sp) # 1,6 17#elselnop # 1,0 17#endifselb $lr, rv7, $lr, rv5 # 0,2 18lqd save1, -16($sp) # 1,6 18#nopbrz present2, do_load # 1,4 19ovly_load9:#nopbi target # 1,4 20/* If we get here, we are about to load a new overlay.* "vma" contains the relevant entry from _ovly_table[].* extern struct {* u32 vma;* u32 size;* u32 file_offset;* u32 buf;* } _ovly_table[];*/.align 3.global __ovly_load_event.type __ovly_load_event, @function__ovly_load_event:do_load:#ifdef OVLY_IRQ_SAVEila irqtmp1, do_load10 # 0,2 -5rotqbyi sz, vma, 8 # 1,4 -5#noprdch irq_stat, $SPU_RdMachStat # 1,6 -4#nopbid irqtmp1 # 1,4 -3do_load10:nop#else#noprotqbyi sz, vma, 8 # 1,4 0#endifrotqbyi osize, vma, 4 # 1,4 1#noplqa ea64, _EAR_ # 1,6 2#noplqr cgshuf, __cg_pattern # 1,6 3/* We could predict the branch at the end of this loop by adding a fewinstructions, and there are plenty of free cycles to do so withoutimpacting loop execution time. However, it doesn't make a greatdeal of sense since we need to wait for the dma to complete anyway. */__ovly_xfer_loop:#noprotqmbyi off64, sz, -4 # 1,4 4#nop; lnop#nop; lnop#nop; lnopcg cgbits, ea64, off64 # 0,2 8#lnop#nop; lnop#nopshufb add64, cgbits, cgbits, cgshuf # 1,4 10#nop; lnop#nop; lnop#nop; lnopaddx add64, ea64, off64 # 0,2 14#lnopila maxsize, MFC_MAX_DMA_SIZE # 0,2 15lnopori ea64, add64, 0 # 0,2 16rotqbyi ealo, add64, 4 # 1,4 16cgt cmp, osize, maxsize # 0,2 17wrch $MFC_LSA, vma # 1,6 17#nop; lnopselb sz, osize, maxsize, cmp # 0,2 19wrch $MFC_EAH, ea64 # 1,6 19ila tagid, MFC_TAG_ID # 0,2 20wrch $MFC_EAL, ealo # 1,6 20ila cmd, MFC_GET_CMD # 0,2 21wrch $MFC_Size, sz # 1,6 21sf osize, sz, osize # 0,2 22wrch $MFC_TagId, tagid # 1,6 22a vma, vma, sz # 0,2 23wrch $MFC_Cmd, cmd # 1,6 23#nopbrnz osize, __ovly_xfer_loop # 1,4 24/* Now update our data structions while waiting for DMA to complete.Low bit of .size needs to be cleared on the _ovly_table entrycorresponding to the evicted overlay, and set on the entry for thenewly loaded overlay. Note that no overlay may in fact be evictedas _ovly_buf_table[] starts with all zeros. Don't zap .size entryfor zero index! Also of course update the _ovly_buf_table entry. */#noplqr newovl, __ovly_current # 1,6 25#nop; lnop#nop; lnop#nop; lnop#nop; lnop#nop; lnopshli off3, newovl, 4 # 0,4 31#lnopila tab3, _ovly_table - 16 # 0,2 32#lnop#nopfsmbi pbyte, 0x100 # 1,4 33#nop; lnop#noplqx vma, tab3, off3 # 1,6 35#nop; lnopandi pbit, pbyte, 1 # 0,2 37lnop#nop; lnop#nop; lnop#nop; lnopor newvma, vma, pbit # 0,2 41rotqbyi buf3, vma, 12 # 1,4 41#nop; lnop#nopstqx newvma, tab3, off3 # 1,6 43#nop; lnopshli off4, buf3, 2 # 1,4 45#lnopila tab4, _ovly_buf_table - 4 # 0,2 46#lnop#nop; lnop#nop; lnop#noplqx map, tab4, off4 # 1,6 49#nopcwx genwi, tab4, off4 # 1,4 50a addr4, tab4, off4 # 0,2 51#lnop#nop; lnop#nop; lnop#nop; lnop#noprotqby oldovl, map, addr4 # 1,4 55#nopshufb newmap, newovl, map, genwi # 0,4 56#if MFC_TAG_ID < 16ila newmask, 1 << MFC_TAG_ID # 0,2 57#elseilhu newmask, 1 << (MFC_TAG_ID - 16) # 0,2 57#endif#lnop#nop; lnop#nop; lnopstqd newmap, 0(addr4) # 1,6 60/* Save app's tagmask, wait for DMA complete, restore mask. */ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61rdch oldmask, $MFC_RdTagMask # 1,6 61#nopwrch $MFC_WrTagMask, newmask # 1,6 62#nopwrch $MFC_WrTagUpdate, tagstat # 1,6 63#noprdch tagstat, $MFC_RdTagStat # 1,6 64#nopsync # 1,4 65/* Any hint prior to the sync is lost. A hint here allows the branchto complete 15 cycles after the hint. With no hint the branch willtake 18 or 19 cycles. */ila tab5, _ovly_table - 16 # 0,2 66hbr do_load99, target # 1,15 66shli off5, oldovl, 4 # 0,4 67wrch $MFC_WrTagMask, oldmask # 1,6 67ceqi zovl, oldovl, 0 # 0,2 68#lnop#nop; lnop#nopfsm zovl, zovl # 1,4 70#noplqx oldvma, tab5, off5 # 1,6 71#noplqd save3, -48($sp) # 1,6 72#nop; lnopandc pbit, pbit, zovl # 0,2 74lqd save2, -32($sp) # 1,6 74#ifdef OVLY_IRQ_SAVEila irqtmp2, do_load90 # 0,2 75#lnopandi irq_stat, irq_stat, 1 # 0,2 76#lnop#else#nop; lnop#nop; lnop#endifandc oldvma, oldvma, pbit # 0,2 77lqd save1, -16($sp) # 1,6 77nop # 0,0 78#lnop#nopstqx oldvma, tab5, off5 # 1,6 79#nop#ifdef OVLY_IRQ_SAVEbinze irq_stat, irqtmp2 # 1,4 80do_load90:#noplqd save4, -64($sp) # 1,6 84#else#nop; lnop#endif.global _ovly_debug_event.type _ovly_debug_event, @function_ovly_debug_event:nop/* Branch to target address. */do_load99:bi target # 1,4 81/85.size __ovly_load, . - __ovly_load
