OpenCores
URL https://opencores.org/ocsvn/openrisc_me/openrisc_me/trunk

Subversion Repositories openrisc_me

[/] [openrisc/] [trunk/] [gnu-src/] [binutils-2.18.50/] [ld/] [emultempl/] [spu_ovl.S] - Blame information for rev 318

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 38 julius
/* Overlay manager for SPU.
2
 
3
   Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
4
 
5
   This file is part of the GNU Binutils.
6
 
7
   This program is free software; you can redistribute it and/or modify
8
   it under the terms of the GNU General Public License as published by
9
   the Free Software Foundation; either version 3 of the License, or
10
   (at your option) any later version.
11
 
12
   This program is distributed in the hope that it will be useful,
13
   but WITHOUT ANY WARRANTY; without even the implied warranty of
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
   GNU General Public License for more details.
16
 
17
   You should have received a copy of the GNU General Public License
18
   along with this program; if not, write to the Free Software
19
   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
20
   MA 02110-1301, USA.  */
21
 
22
/* MFC DMA defn's.  */
23
#define MFC_GET_CMD             0x40
24
#define MFC_MAX_DMA_SIZE        0x4000
25
#define MFC_TAG_UPDATE_ALL      2
26
#define MFC_TAG_ID              0
27
 
28
/* Register usage.  */
29
#define reserved1       $75
30
#define parm            $75
31
#define tab1            reserved1
32
#define tab2            reserved1
33
#define vma             reserved1
34
#define oldvma          reserved1
35
#define newmask         reserved1
36
#define map             reserved1
37
 
38
#define reserved2       $76
39
#define off1            reserved2
40
#define off2            reserved2
41
#define present1        reserved2
42
#define present2        reserved2
43
#define sz              reserved2
44
#define cmp             reserved2
45
#define add64           reserved2
46
#define cgbits          reserved2
47
#define off3            reserved2
48
#define off4            reserved2
49
#define addr4           reserved2
50
#define off5            reserved2
51
#define tagstat         reserved2
52
 
53
#define reserved3       $77
54
#define size1           reserved3
55
#define size2           reserved3
56
#define rv3             reserved3
57
#define ealo            reserved3
58
#define cmd             reserved3
59
#define off64           reserved3
60
#define tab3            reserved3
61
#define tab4            reserved3
62
#define tab5            reserved3
63
 
64
#define reserved4       $78
65
#define ovl             reserved4
66
#define rv2             reserved4
67
#define rv5             reserved4
68
#define cgshuf          reserved4
69
#define newovl          reserved4
70
 
71
#define reserved5       $79
72
#define target          reserved5
73
 
74
#define save1           $72
75
#define rv4             save1
76
#define rv7             save1
77
#define tagid           save1
78
#define maxsize         save1
79
#define pbyte           save1
80
#define pbit            save1
81
 
82
#define save2           $73
83
#define cur             save2
84
#define rv6             save2
85
#define osize           save2
86
#define zovl            save2
87
#define oldovl          save2
88
#define newvma          save2
89
 
90
#define save3           $74
91
#define rv1             save3
92
#define ea64            save3
93
#define buf3            save3
94
#define genwi           save3
95
#define newmap          save3
96
#define oldmask         save3
97
 
98
 
99
        .text
100
        .align  4
101
        .type   __rv_pattern, @object
102
        .size   __rv_pattern, 16
103
__rv_pattern:
104
        .word   0x00010203, 0x10111213, 0x80808080, 0x80808080
105
 
106
        .type   __cg_pattern, @object
107
        .size   __cg_pattern, 16
108
__cg_pattern:
109
        .word   0x04050607, 0x80808080, 0x80808080, 0x80808080
110
 
111
        .type   __ovly_current, @object
112
        .size   __ovly_current, 16
113
__ovly_current:
114
        .space  16
115
 
116
/*
117
 * __ovly_return - stub for returning from overlay functions.
118
 *
119
 * On entry the four slots of $lr are:
120
 *   __ovly_return, prev ovl index, caller return addr, undefined.
121
 *
122
 * Load the previous overlay and jump to the caller return address.
123
 * Updates __ovly_current.
124
 */
125
        .align  4
126
        .global __ovly_return
127
        .type   __ovly_return, @function
128
__ovly_return:
129
        ila     tab1, _ovly_table - 16                          # 0,2   0
130
        shlqbyi ovl, $lr, 4                                     # 1,4   0
131
#nop
132
        shlqbyi target, $lr, 8                                  # 1,4   1
133
#nop; lnop
134
#nop; lnop
135
        shli    off1, ovl, 4                                    # 0,4   4
136
#lnop
137
#nop
138
        hbr     ovly_ret9, target                               # 1,15  5
139
#nop; lnop
140
#nop; lnop
141
#nop
142
        lqx     vma, tab1, off1                                 # 1,6   8
143
#nop; lnop
144
#nop; lnop
145
#nop; lnop
146
#nop; lnop
147
#nop; lnop
148
#nop
149
        rotqbyi size1, vma, 4                                   # 1,4   14
150
#nop
151
        stqd    save3, -48($sp)                                 # 1,6   15
152
#nop
153
        stqd    save2, -32($sp)                                 # 1,6   16
154
#nop
155
        stqd    save1, -16($sp)                                 # 1,6   17
156
        andi    present1, size1, 1                              # 0,2   18
157
        stqr    ovl, __ovly_current                             # 1,6   18
158
#nop; lnop
159
#nop
160
        brz     present1, do_load                               # 1,4   20
161
ovly_ret9:
162
#nop
163
        bi      target                                          # 1,4   21
164
 
165
/*
166
 * __ovly_load - copy an overlay partion to local store.
167
 *
168
 * On entry $75 points to a word consisting of the overlay index in
169
 * the top 14 bits, and the target address in the bottom 18 bits.
170
 *
171
 * Sets up $lr to return via __ovly_return.
172
 * Updates __ovly_current.
173
 */
174
        .align  3
175
        .global __ovly_load
176
        .type   __ovly_load, @function
177
__ovly_load:
178
#if OVL_STUB_SIZE == 8
179
########
180
#nop
181
        lqd     target, 0(parm)                                 # 1,6   -11
182
#nop; lnop
183
#nop; lnop
184
#nop; lnop
185
#nop; lnop
186
#nop; lnop
187
#nop
188
        rotqby  target, target, parm                            # 1,4   -5
189
        ila     tab2, _ovly_table - 16                          # 0,2   -4
190
        stqd    save3, -48($sp)                                 # 1,6   -4
191
#nop
192
        stqd    save2, -32($sp)                                 # 1,6   -3
193
#nop
194
        stqd    save1, -16($sp)                                 # 1,6   -2
195
        rotmi   ovl, target, -18                                # 0,4   -1
196
        hbr     ovly_load9, target                              # 1,15  -1
197
        ila     rv1, __ovly_return                              # 0,2   0
198
#lnop
199
#nop; lnop
200
#nop
201
        lqr     cur, __ovly_current                             # 1,6   2
202
        shli    off2, ovl, 4                                    # 0,4   3
203
        stqr    ovl, __ovly_current                             # 1,6   3
204
        ceq     rv2, $lr, rv1                                   # 0,2   4
205
        lqr     rv3, __rv_pattern                               # 1,6   4
206
#nop; lnop
207
#nop; lnop
208
#nop
209
        lqx     vma, tab2, off2                                 # 1,6   7
210
########
211
#else /* OVL_STUB_SIZE == 16 */
212
########
213
        ila     tab2, _ovly_table - 16                          # 0,2   0
214
        stqd    save3, -48($sp)                                 # 1,6   0
215
        ila     rv1, __ovly_return                              # 0,2   1
216
        stqd    save2, -32($sp)                                 # 1,6   1
217
        shli    off2, ovl, 4                                    # 0,4   2
218
        lqr     cur, __ovly_current                             # 1,6   2
219
        nop
220
        stqr    ovl, __ovly_current                             # 1,6   3
221
        ceq     rv2, $lr, rv1                                   # 0,2   4
222
        lqr     rv3, __rv_pattern                               # 1,6   4
223
#nop
224
        hbr     ovly_load9, target                              # 1,15  5
225
#nop
226
        lqx     vma, tab2, off2                                 # 1,6   6
227
#nop
228
        stqd    save1, -16($sp)                                 # 1,6   7
229
########
230
#endif
231
 
232
#nop; lnop
233
#nop; lnop
234
#nop
235
        shufb   rv4, rv1, cur, rv3                              # 1,4   10
236
#nop
237
        fsmb    rv5, rv2                                        # 1,4   11
238
#nop
239
        rotqmbyi rv6, $lr, -8                                   # 1,4   12
240
#nop
241
        rotqbyi size2, vma, 4                                   # 1,4   13
242
#nop
243
        lqd     save3, -48($sp)                                 # 1,6   14
244
#nop; lnop
245
        or      rv7, rv4, rv6                                   # 0,2   16
246
        lqd     save2, -32($sp)                                 # 1,6   16
247
        andi    present2, size2, 1                              # 0,2   17
248
        lnop                                                    # 1,0   17
249
        selb    $lr, rv7, $lr, rv5                              # 0,2   18
250
        lqd     save1, -16($sp)                                 # 1,6   18
251
#nop
252
        brz     present2, do_load                               # 1,4   19
253
ovly_load9:
254
#nop
255
        bi      target                                          # 1,4   20
256
 
257
/* If we get here, we are about to load a new overlay.
258
 * "vma" contains the relevant entry from _ovly_table[].
259
 *      extern struct {
260
 *              u32 vma;
261
 *              u32 size;
262
 *              u32 file_offset;
263
 *              u32 buf;
264
 *      } _ovly_table[];
265
 */
266
        .align  3
267
        .global __ovly_load_event
268
        .type   __ovly_load_event, @function
269
__ovly_load_event:
270
do_load:
271
#nop
272
        rotqbyi sz, vma, 8                                      # 1,4   0
273
#nop
274
        rotqbyi osize, vma, 4                                   # 1,4   1
275
#nop
276
        lqa     ea64, _EAR_                                     # 1,6   2
277
#nop
278
        lqr     cgshuf, __cg_pattern                            # 1,6   3
279
 
280
/* We could predict the branch at the end of this loop by adding a few
281
   instructions, and there are plenty of free cycles to do so without
282
   impacting loop execution time.  However, it doesn't make a great
283
   deal of sense since we need to wait for the dma to complete anyway.  */
284
__ovly_xfer_loop:
285
#nop
286
        rotqmbyi off64, sz, -4                                  # 1,4   4
287
#nop; lnop
288
#nop; lnop
289
#nop; lnop
290
        cg      cgbits, ea64, off64                             # 0,2   8
291
#lnop
292
#nop; lnop
293
#nop
294
        shufb   add64, cgbits, cgbits, cgshuf                   # 1,4   10
295
#nop; lnop
296
#nop; lnop
297
#nop; lnop
298
        addx    add64, ea64, off64                              # 0,2   14
299
#lnop
300
        ila     maxsize, MFC_MAX_DMA_SIZE                       # 0,2   15
301
        lnop
302
        ori     ea64, add64, 0                                  # 0,2   16
303
        rotqbyi ealo, add64, 4                                  # 1,4   16
304
        cgt     cmp, osize, maxsize                             # 0,2   17
305
        wrch    $MFC_LSA, vma                                   # 1,6   17
306
#nop; lnop
307
        selb    sz, osize, maxsize, cmp                         # 0,2   19
308
        wrch    $MFC_EAH, ea64                                  # 1,6   19
309
        ila     tagid, MFC_TAG_ID                               # 0,2   20
310
        wrch    $MFC_EAL, ealo                                  # 1,6   20
311
        ila     cmd, MFC_GET_CMD                                # 0,2   21
312
        wrch    $MFC_Size, sz                                   # 1,6   21
313
        sf      osize, sz, osize                                # 0,2   22
314
        wrch    $MFC_TagId, tagid                               # 1,6   22
315
        a       vma, vma, sz                                    # 0,2   23
316
        wrch    $MFC_Cmd, cmd                                   # 1,6   23
317
#nop
318
        brnz    osize, __ovly_xfer_loop                         # 1,4   24
319
 
320
/* Now update our data structions while waiting for DMA to complete.
321
   Low bit of .size needs to be cleared on the _ovly_table entry
322
   corresponding to the evicted overlay, and set on the entry for the
323
   newly loaded overlay.  Note that no overlay may in fact be evicted
324
   as _ovly_buf_table[] starts with all zeros.  Don't zap .size entry
325
   for zero index!  Also of course update the _ovly_buf_table entry.  */
326
#nop
327
        lqr     newovl, __ovly_current                          # 1,6   25
328
#nop; lnop
329
#nop; lnop
330
#nop; lnop
331
#nop; lnop
332
#nop; lnop
333
        shli    off3, newovl, 4                                 # 0,4   31
334
#lnop
335
        ila     tab3, _ovly_table - 16                          # 0,2   32
336
#lnop
337
#nop
338
        fsmbi   pbyte, 0x100                                    # 1,4   33
339
#nop; lnop
340
#nop
341
        lqx     vma, tab3, off3                                 # 1,6   35
342
#nop; lnop
343
        andi    pbit, pbyte, 1                                  # 0,2   37
344
        lnop
345
#nop; lnop
346
#nop; lnop
347
#nop; lnop
348
        or      newvma, vma, pbit                               # 0,2   41
349
        rotqbyi buf3, vma, 12                                   # 1,4   41
350
#nop; lnop
351
#nop
352
        stqx    newvma, tab3, off3                              # 1,6   43
353
#nop; lnop
354
        shli    off4, buf3, 2                                   # 1,4   45
355
#lnop
356
        ila     tab4, _ovly_buf_table - 4                       # 0,2   46
357
#lnop
358
#nop; lnop
359
#nop; lnop
360
#nop
361
        lqx     map, tab4, off4                                 # 1,6   49
362
#nop
363
        cwx     genwi, tab4, off4                               # 1,4   50
364
        a       addr4, tab4, off4                               # 0,2   51
365
#lnop
366
#nop; lnop
367
#nop; lnop
368
#nop; lnop
369
#nop
370
        rotqby  oldovl, map, addr4                              # 1,4   55
371
#nop
372
        shufb   newmap, newovl, map, genwi                      # 0,4   56
373
#if MFC_TAG_ID < 16
374
        ila     newmask, 1 << MFC_TAG_ID                        # 0,2   57
375
#else
376
        ilhu    newmask, 1 << (MFC_TAG_ID - 16)                 # 0,2   57
377
#endif
378
#lnop
379
#nop; lnop
380
#nop; lnop
381
        stqd    newmap, 0(addr4)                                # 1,6   60
382
 
383
/* Save app's tagmask, wait for DMA complete, restore mask.  */
384
        ila     tagstat, MFC_TAG_UPDATE_ALL                     # 0,2   61
385
        rdch    oldmask, $MFC_RdTagMask                         # 1,6   61
386
#nop
387
        wrch    $MFC_WrTagMask, newmask                         # 1,6   62
388
#nop
389
        wrch    $MFC_WrTagUpdate, tagstat                       # 1,6   63
390
#nop
391
        rdch    tagstat, $MFC_RdTagStat                         # 1,6   64
392
#nop
393
        sync                                                    # 1,4   65
394
/* Any hint prior to the sync is lost.  A hint here allows the branch
395
   to complete 15 cycles after the hint.  With no hint the branch will
396
   take 18 or 19 cycles.  */
397
        ila     tab5, _ovly_table - 16                          # 0,2   66
398
        hbr     do_load99, target                               # 1,15  66
399
        shli    off5, oldovl, 4                                 # 0,4   67
400
        wrch    $MFC_WrTagMask, oldmask                         # 1,6   67
401
        ceqi    zovl, oldovl, 0                                 # 0,2   68
402
#lnop
403
#nop; lnop
404
#nop
405
        fsm     zovl, zovl                                      # 1,4   70
406
#nop
407
        lqx     oldvma, tab5, off5                              # 1,6   71
408
#nop
409
        lqd     save3, -48($sp)                                 # 1,6   72
410
#nop; lnop
411
        andc    pbit, pbit, zovl                                # 0,2   74
412
        lqd     save2, -32($sp)                                 # 1,6   74
413
#nop; lnop
414
#nop; lnop
415
        andc    oldvma, oldvma, pbit                            # 0,2   77
416
        lqd     save1, -16($sp)                                 # 1,6   77
417
#nop; lnop
418
        nop
419
        stqx    oldvma, tab5, off5                              # 1,6   79
420
#nop; lnop
421
 
422
        .global _ovly_debug_event
423
        .type   _ovly_debug_event, @function
424
_ovly_debug_event:
425
        nop
426
/* Branch to target address. */
427
do_load99:
428
        bi      target                                          # 1,4   81
429
 
430
        .size   __ovly_load, . - __ovly_load

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.