OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgcc/] [config/] [spu/] [cachemgr.c] - Blame information for rev 784

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 734 jeremybenn
/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
2
 
3
This file is part of GCC.
4
 
5
GCC is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free
7
Software Foundation; either version 3, or (at your option) any later
8
version.
9
 
10
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11
WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13
for more details.
14
 
15
Under Section 7 of GPL version 3, you are granted additional
16
permissions described in the GCC Runtime Library Exception, version
17
3.1, as published by the Free Software Foundation.
18
 
19
You should have received a copy of the GNU General Public License and
20
a copy of the GCC Runtime Library Exception along with this program;
21
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22
<http://www.gnu.org/licenses/>.  */
23
 
24
#include <spu_mfcio.h>
25
#include <spu_internals.h>
26
#include <spu_intrinsics.h>
27
#include <spu_cache.h>
28
 
29
extern unsigned long long __ea_local_store;
30
extern char __cache_tag_array_size;
31
 
32
#define LINE_SIZE 128
33
#define TAG_MASK (LINE_SIZE - 1)
34
 
35
#define WAYS 4
36
#define SET_MASK ((int) &__cache_tag_array_size - LINE_SIZE)
37
 
38
#define CACHE_LINES ((int) &__cache_tag_array_size /            \
39
                     sizeof (struct __cache_tag_array) * WAYS)
40
 
41
struct __cache_tag_array
42
{
43
  unsigned int tag_lo[WAYS];
44
  unsigned int tag_hi[WAYS];
45
  void *base[WAYS];
46
  int reserved[WAYS];
47
  vector unsigned short dirty_bits[WAYS];
48
};
49
 
50
extern struct __cache_tag_array __cache_tag_array[];
51
extern char __cache[];
52
 
53
/* In order to make the code seem a little cleaner, and to avoid having
54
   64/32 bit ifdefs all over the place, we use macros.  */
55
 
56
#ifdef __EA64__
57
typedef unsigned long long addr;
58
 
59
#define CHECK_TAG(_entry, _way, _tag)                   \
60
  ((_entry)->tag_lo[(_way)] == ((_tag) & 0xFFFFFFFF)    \
61
   && (_entry)->tag_hi[(_way)] == ((_tag) >> 32))
62
 
63
#define GET_TAG(_entry, _way) \
64
  ((unsigned long long)(_entry)->tag_hi[(_way)] << 32   \
65
   | (unsigned long long)(_entry)->tag_lo[(_way)])
66
 
67
#define SET_TAG(_entry, _way, _tag)                     \
68
  (_entry)->tag_lo[(_way)] = (_tag) & 0xFFFFFFFF;       \
69
  (_entry)->tag_hi[(_way)] = (_tag) >> 32
70
 
71
#else /*__EA32__*/
72
typedef unsigned long addr;
73
 
74
#define CHECK_TAG(_entry, _way, _tag)                   \
75
  ((_entry)->tag_lo[(_way)] == (_tag))
76
 
77
#define GET_TAG(_entry, _way)                           \
78
  ((_entry)->tag_lo[(_way)])
79
 
80
#define SET_TAG(_entry, _way, _tag)                     \
81
  (_entry)->tag_lo[(_way)] = (_tag)
82
 
83
#endif
84
 
85
/* In GET_ENTRY, we cast away the high 32 bits,
86
   as the tag is only in the low 32.  */
87
 
88
#define GET_ENTRY(_addr)                                                   \
89
  ((struct __cache_tag_array *)                                            \
90
   si_to_uint (si_a (si_and (si_from_uint ((unsigned int) (addr) (_addr)), \
91
                             si_from_uint (SET_MASK)),                     \
92
               si_from_uint ((unsigned int) __cache_tag_array))))
93
 
94
#define GET_CACHE_LINE(_addr, _way) \
95
  ((void *) (__cache + ((_addr) & SET_MASK) * WAYS) + ((_way) * LINE_SIZE));
96
 
97
#define CHECK_DIRTY(_vec) (si_to_uint (si_orx ((qword) (_vec))))
98
#define SET_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] = 1)
99
#define CHECK_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] == 1)
100
 
101
#define LS_FLAG 0x80000000
102
#define SET_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] |= LS_FLAG)
103
#define CHECK_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] & LS_FLAG)
104
#define GET_LRU(_entry, _way) ((_entry)->reserved[(_way)] & ~LS_FLAG)
105
 
106
static int dma_tag = 32;
107
 
108
static void
109
__cache_evict_entry (struct __cache_tag_array *entry, int way)
110
{
111
  addr tag = GET_TAG (entry, way);
112
 
113
  if (CHECK_DIRTY (entry->dirty_bits[way]) && !CHECK_IS_LS (entry, way))
114
    {
115
#ifdef NONATOMIC
116
      /* Non-atomic writes.  */
117
      unsigned int oldmask, mach_stat;
118
      char *line = ((void *) 0);
119
 
120
      /* Enter critical section.  */
121
      mach_stat = spu_readch (SPU_RdMachStat);
122
      spu_idisable ();
123
 
124
      /* Issue DMA request.  */
125
      line = GET_CACHE_LINE (entry->tag_lo[way], way);
126
      mfc_put (line, tag, LINE_SIZE, dma_tag, 0, 0);
127
 
128
      /* Wait for DMA completion.  */
129
      oldmask = mfc_read_tag_mask ();
130
      mfc_write_tag_mask (1 << dma_tag);
131
      mfc_read_tag_status_all ();
132
      mfc_write_tag_mask (oldmask);
133
 
134
      /* Leave critical section.  */
135
      if (__builtin_expect (mach_stat & 1, 0))
136
        spu_ienable ();
137
#else
138
      /* Allocate a buffer large enough that we know it has 128 bytes
139
         that are 128 byte aligned (for DMA). */
140
 
141
      char buffer[LINE_SIZE + 127];
142
      qword *buf_ptr = (qword *) (((unsigned int) (buffer) + 127) & ~127);
143
      qword *line = GET_CACHE_LINE (entry->tag_lo[way], way);
144
      qword bits;
145
      unsigned int mach_stat;
146
 
147
      /* Enter critical section.  */
148
      mach_stat = spu_readch (SPU_RdMachStat);
149
      spu_idisable ();
150
 
151
      do
152
        {
153
          /* We atomically read the current memory into a buffer
154
             modify the dirty bytes in the buffer, and write it
155
             back. If writeback fails, loop and try again.  */
156
 
157
          mfc_getllar (buf_ptr, tag, 0, 0);
158
          mfc_read_atomic_status ();
159
 
160
          /* The method we're using to write 16 dirty bytes into
161
             the buffer at a time uses fsmb which in turn uses
162
             the least significant 16 bits of word 0, so we
163
             load the bits and rotate so that the first bit of
164
             the bitmap is in the first bit that fsmb will use.  */
165
 
166
          bits = (qword) entry->dirty_bits[way];
167
          bits = si_rotqbyi (bits, -2);
168
 
169
          /* Si_fsmb creates the mask of dirty bytes.
170
             Use selb to nab the appropriate bits.  */
171
          buf_ptr[0] = si_selb (buf_ptr[0], line[0], si_fsmb (bits));
172
 
173
          /* Rotate to next 16 byte section of cache.  */
174
          bits = si_rotqbyi (bits, 2);
175
 
176
          buf_ptr[1] = si_selb (buf_ptr[1], line[1], si_fsmb (bits));
177
          bits = si_rotqbyi (bits, 2);
178
          buf_ptr[2] = si_selb (buf_ptr[2], line[2], si_fsmb (bits));
179
          bits = si_rotqbyi (bits, 2);
180
          buf_ptr[3] = si_selb (buf_ptr[3], line[3], si_fsmb (bits));
181
          bits = si_rotqbyi (bits, 2);
182
          buf_ptr[4] = si_selb (buf_ptr[4], line[4], si_fsmb (bits));
183
          bits = si_rotqbyi (bits, 2);
184
          buf_ptr[5] = si_selb (buf_ptr[5], line[5], si_fsmb (bits));
185
          bits = si_rotqbyi (bits, 2);
186
          buf_ptr[6] = si_selb (buf_ptr[6], line[6], si_fsmb (bits));
187
          bits = si_rotqbyi (bits, 2);
188
          buf_ptr[7] = si_selb (buf_ptr[7], line[7], si_fsmb (bits));
189
          bits = si_rotqbyi (bits, 2);
190
 
191
          mfc_putllc (buf_ptr, tag, 0, 0);
192
        }
193
      while (mfc_read_atomic_status ());
194
 
195
      /* Leave critical section.  */
196
      if (__builtin_expect (mach_stat & 1, 0))
197
        spu_ienable ();
198
#endif
199
    }
200
 
201
  /* In any case, marking the lo tag with 1 which denotes empty.  */
202
  SET_EMPTY (entry, way);
203
  entry->dirty_bits[way] = (vector unsigned short) si_from_uint (0);
204
}
205
 
206
void
207
__cache_evict (__ea void *ea)
208
{
209
  addr tag = (addr) ea & ~TAG_MASK;
210
  struct __cache_tag_array *entry = GET_ENTRY (ea);
211
  int i = 0;
212
 
213
  /* Cycles through all the possible ways an address could be at
214
     and evicts the way if found.  */
215
 
216
  for (i = 0; i < WAYS; i++)
217
    if (CHECK_TAG (entry, i, tag))
218
      __cache_evict_entry (entry, i);
219
}
220
 
221
static void *
222
__cache_fill (int way, addr tag)
223
{
224
  unsigned int oldmask, mach_stat;
225
  char *line = ((void *) 0);
226
 
227
  /* Reserve our DMA tag.  */
228
  if (dma_tag == 32)
229
    dma_tag = mfc_tag_reserve ();
230
 
231
  /* Enter critical section.  */
232
  mach_stat = spu_readch (SPU_RdMachStat);
233
  spu_idisable ();
234
 
235
  /* Issue DMA request.  */
236
  line = GET_CACHE_LINE (tag, way);
237
  mfc_get (line, tag, LINE_SIZE, dma_tag, 0, 0);
238
 
239
  /* Wait for DMA completion.  */
240
  oldmask = mfc_read_tag_mask ();
241
  mfc_write_tag_mask (1 << dma_tag);
242
  mfc_read_tag_status_all ();
243
  mfc_write_tag_mask (oldmask);
244
 
245
  /* Leave critical section.  */
246
  if (__builtin_expect (mach_stat & 1, 0))
247
    spu_ienable ();
248
 
249
  return (void *) line;
250
}
251
 
252
static void
253
__cache_miss (__ea void *ea, struct __cache_tag_array *entry, int way)
254
{
255
 
256
  addr tag = (addr) ea & ~TAG_MASK;
257
  unsigned int lru = 0;
258
  int i = 0;
259
  int idx = 0;
260
 
261
  /* If way > 4, then there are no empty slots, so we must evict
262
     the least recently used entry. */
263
  if (way >= 4)
264
    {
265
      for (i = 0; i < WAYS; i++)
266
        {
267
          if (GET_LRU (entry, i) > lru)
268
            {
269
              lru = GET_LRU (entry, i);
270
              idx = i;
271
            }
272
        }
273
      __cache_evict_entry (entry, idx);
274
      way = idx;
275
    }
276
 
277
  /* Set the empty entry's tag and fill it's cache line. */
278
 
279
  SET_TAG (entry, way, tag);
280
  entry->reserved[way] = 0;
281
 
282
  /* Check if the address is just an effective address within the
283
     SPU's local store. */
284
 
285
  /* Because the LS is not 256k aligned, we can't do a nice and mask
286
     here to compare, so we must check the whole range.  */
287
 
288
  if ((addr) ea >= (addr) __ea_local_store
289
      && (addr) ea < (addr) (__ea_local_store + 0x40000))
290
    {
291
      SET_IS_LS (entry, way);
292
      entry->base[way] =
293
        (void *) ((unsigned int) ((addr) ea -
294
                                  (addr) __ea_local_store) & ~0x7f);
295
    }
296
  else
297
    {
298
      entry->base[way] = __cache_fill (way, tag);
299
    }
300
}
301
 
302
void *
303
__cache_fetch_dirty (__ea void *ea, int n_bytes_dirty)
304
{
305
#ifdef __EA64__
306
  unsigned int tag_hi;
307
  qword etag_hi;
308
#endif
309
  unsigned int tag_lo;
310
  struct __cache_tag_array *entry;
311
 
312
  qword etag_lo;
313
  qword equal;
314
  qword bit_mask;
315
  qword way;
316
 
317
  /* This first chunk, we merely fill the pointer and tag.  */
318
 
319
  entry = GET_ENTRY (ea);
320
 
321
#ifndef __EA64__
322
  tag_lo =
323
    si_to_uint (si_andc
324
                (si_shufb
325
                 (si_from_uint ((addr) ea), si_from_uint (0),
326
                  si_from_uint (0x00010203)), si_from_uint (TAG_MASK)));
327
#else
328
  tag_lo =
329
    si_to_uint (si_andc
330
                (si_shufb
331
                 (si_from_ullong ((addr) ea), si_from_uint (0),
332
                  si_from_uint (0x04050607)), si_from_uint (TAG_MASK)));
333
 
334
  tag_hi =
335
    si_to_uint (si_shufb
336
                (si_from_ullong ((addr) ea), si_from_uint (0),
337
                 si_from_uint (0x00010203)));
338
#endif
339
 
340
  /* Increment LRU in reserved bytes.  */
341
  si_stqd (si_ai (si_lqd (si_from_ptr (entry), 48), 1),
342
           si_from_ptr (entry), 48);
343
 
344
missreturn:
345
  /* Check if the entry's lo_tag is equal to the address' lo_tag.  */
346
  etag_lo = si_lqd (si_from_ptr (entry), 0);
347
  equal = si_ceq (etag_lo, si_from_uint (tag_lo));
348
#ifdef __EA64__
349
  /* And the high tag too.  */
350
  etag_hi = si_lqd (si_from_ptr (entry), 16);
351
  equal = si_and (equal, (si_ceq (etag_hi, si_from_uint (tag_hi))));
352
#endif
353
 
354
  if ((si_to_uint (si_orx (equal)) == 0))
355
    goto misshandler;
356
 
357
  if (n_bytes_dirty)
358
    {
359
      /* way = 0x40,0x50,0x60,0x70 for each way, which is also the
360
         offset of the appropriate dirty bits.  */
361
      way = si_shli (si_clz (si_gbb (equal)), 2);
362
 
363
      /* To create the bit_mask, we set it to all 1s (uint -1), then we
364
         shift it over (128 - n_bytes_dirty) times.  */
365
 
366
      bit_mask = si_from_uint (-1);
367
 
368
      bit_mask =
369
        si_shlqby (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) / 8));
370
 
371
      bit_mask =
372
        si_shlqbi (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) % 8));
373
 
374
      /* Rotate it around to the correct offset.  */
375
      bit_mask =
376
        si_rotqby (bit_mask,
377
                   si_from_uint (-1 * ((addr) ea & TAG_MASK) / 8));
378
 
379
      bit_mask =
380
        si_rotqbi (bit_mask,
381
                   si_from_uint (-1 * ((addr) ea & TAG_MASK) % 8));
382
 
383
      /* Update the dirty bits.  */
384
      si_stqx (si_or (si_lqx (si_from_ptr (entry), way), bit_mask),
385
               si_from_ptr (entry), way);
386
    };
387
 
388
  /* We've definitely found the right entry, set LRU (reserved) to 0
389
     maintaining the LS flag (MSB).  */
390
 
391
  si_stqd (si_andc
392
           (si_lqd (si_from_ptr (entry), 48),
393
            si_and (equal, si_from_uint (~(LS_FLAG)))),
394
           si_from_ptr (entry), 48);
395
 
396
  return (void *)
397
    si_to_uint (si_a
398
                (si_orx
399
                 (si_and (si_lqd (si_from_ptr (entry), 32), equal)),
400
                 si_from_uint (((unsigned int) (addr) ea) & TAG_MASK)));
401
 
402
misshandler:
403
  equal = si_ceqi (etag_lo, 1);
404
  __cache_miss (ea, entry, (si_to_uint (si_clz (si_gbb (equal))) - 16) >> 2);
405
  goto missreturn;
406
}
407
 
408
void *
409
__cache_fetch (__ea void *ea)
410
{
411
  return __cache_fetch_dirty (ea, 0);
412
}
413
 
414
void
415
__cache_touch (__ea void *ea __attribute__ ((unused)))
416
{
417
  /* NO-OP for now.  */
418
}
419
 
420
void __cache_flush (void) __attribute__ ((destructor));
421
void
422
__cache_flush (void)
423
{
424
  struct __cache_tag_array *entry = __cache_tag_array;
425
  unsigned int i;
426
  int j;
427
 
428
  /* Cycle through each cache entry and evict all used ways.  */
429
 
430
  for (i = 0; i < CACHE_LINES / WAYS; i++)
431
    {
432
      for (j = 0; j < WAYS; j++)
433
        if (!CHECK_EMPTY (entry, j))
434
          __cache_evict_entry (entry, j);
435
 
436
      entry++;
437
    }
438
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.