1 |
734 |
jeremybenn |
/* TILE atomics.
|
2 |
|
|
Copyright (C) 2011, 2012
|
3 |
|
|
Free Software Foundation, Inc.
|
4 |
|
|
Contributed by Walter Lee (walt@tilera.com)
|
5 |
|
|
|
6 |
|
|
This file is free software; you can redistribute it and/or modify it
|
7 |
|
|
under the terms of the GNU General Public License as published by the
|
8 |
|
|
Free Software Foundation; either version 3, or (at your option) any
|
9 |
|
|
later version.
|
10 |
|
|
|
11 |
|
|
This file is distributed in the hope that it will be useful, but
|
12 |
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
|
|
General Public License for more details.
|
15 |
|
|
|
16 |
|
|
Under Section 7 of GPL version 3, you are granted additional
|
17 |
|
|
permissions described in the GCC Runtime Library Exception, version
|
18 |
|
|
3.1, as published by the Free Software Foundation.
|
19 |
|
|
|
20 |
|
|
You should have received a copy of the GNU General Public License and
|
21 |
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
22 |
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
23 |
|
|
<http://www.gnu.org/licenses/>. */
|
24 |
|
|
|
25 |
|
|
#include "system.h"
|
26 |
|
|
#include "coretypes.h"
|
27 |
|
|
#include "atomic.h"
|
28 |
|
|
|
29 |
|
|
/* This code should be inlined by the compiler, but for now support
|
30 |
|
|
it as out-of-line methods in libgcc. */
|
31 |
|
|
|
32 |
|
|
static void
|
33 |
|
|
pre_atomic_barrier (int model)
|
34 |
|
|
{
|
35 |
|
|
switch ((enum memmodel) model)
|
36 |
|
|
{
|
37 |
|
|
case MEMMODEL_RELEASE:
|
38 |
|
|
case MEMMODEL_ACQ_REL:
|
39 |
|
|
case MEMMODEL_SEQ_CST:
|
40 |
|
|
__atomic_thread_fence (model);
|
41 |
|
|
break;
|
42 |
|
|
default:
|
43 |
|
|
break;
|
44 |
|
|
}
|
45 |
|
|
return;
|
46 |
|
|
}
|
47 |
|
|
|
48 |
|
|
static void
|
49 |
|
|
post_atomic_barrier (int model)
|
50 |
|
|
{
|
51 |
|
|
switch ((enum memmodel) model)
|
52 |
|
|
{
|
53 |
|
|
case MEMMODEL_ACQUIRE:
|
54 |
|
|
case MEMMODEL_ACQ_REL:
|
55 |
|
|
case MEMMODEL_SEQ_CST:
|
56 |
|
|
__atomic_thread_fence (model);
|
57 |
|
|
break;
|
58 |
|
|
default:
|
59 |
|
|
break;
|
60 |
|
|
}
|
61 |
|
|
return;
|
62 |
|
|
}
|
63 |
|
|
|
64 |
|
|
#define __unused __attribute__((unused))
|
65 |
|
|
|
66 |
|
|
/* Provide additional methods not implemented by atomic.h. */
|
67 |
|
|
#define atomic_xor(mem, mask) \
|
68 |
|
|
__atomic_update_cmpxchg(mem, mask, __old ^ __value)
|
69 |
|
|
#define atomic_nand(mem, mask) \
|
70 |
|
|
__atomic_update_cmpxchg(mem, mask, ~(__old & __value))
|
71 |
|
|
|
72 |
|
|
#define __atomic_fetch_and_do(type, size, opname) \
|
73 |
|
|
type \
|
74 |
|
|
__atomic_fetch_##opname##_##size(type* p, type i, int model) \
|
75 |
|
|
{ \
|
76 |
|
|
pre_atomic_barrier(model); \
|
77 |
|
|
type rv = atomic_##opname(p, i); \
|
78 |
|
|
post_atomic_barrier(model); \
|
79 |
|
|
return rv; \
|
80 |
|
|
}
|
81 |
|
|
|
82 |
|
|
__atomic_fetch_and_do (int, 4, add)
|
83 |
|
|
__atomic_fetch_and_do (int, 4, sub)
|
84 |
|
|
__atomic_fetch_and_do (int, 4, or)
|
85 |
|
|
__atomic_fetch_and_do (int, 4, and)
|
86 |
|
|
__atomic_fetch_and_do (int, 4, xor)
|
87 |
|
|
__atomic_fetch_and_do (int, 4, nand)
|
88 |
|
|
__atomic_fetch_and_do (long long, 8, add)
|
89 |
|
|
__atomic_fetch_and_do (long long, 8, sub)
|
90 |
|
|
__atomic_fetch_and_do (long long, 8, or)
|
91 |
|
|
__atomic_fetch_and_do (long long, 8, and)
|
92 |
|
|
__atomic_fetch_and_do (long long, 8, xor)
|
93 |
|
|
__atomic_fetch_and_do (long long, 8, nand)
|
94 |
|
|
#define __atomic_do_and_fetch(type, size, opname, op) \
|
95 |
|
|
type \
|
96 |
|
|
__atomic_##opname##_fetch_##size(type* p, type i, int model) \
|
97 |
|
|
{ \
|
98 |
|
|
pre_atomic_barrier(model); \
|
99 |
|
|
type rv = atomic_##opname(p, i) op i; \
|
100 |
|
|
post_atomic_barrier(model); \
|
101 |
|
|
return rv; \
|
102 |
|
|
}
|
103 |
|
|
__atomic_do_and_fetch (int, 4, add, +)
|
104 |
|
|
__atomic_do_and_fetch (int, 4, sub, -)
|
105 |
|
|
__atomic_do_and_fetch (int, 4, or, |)
|
106 |
|
|
__atomic_do_and_fetch (int, 4, and, &)
|
107 |
|
|
__atomic_do_and_fetch (int, 4, xor, |)
|
108 |
|
|
__atomic_do_and_fetch (int, 4, nand, &)
|
109 |
|
|
__atomic_do_and_fetch (long long, 8, add, +)
|
110 |
|
|
__atomic_do_and_fetch (long long, 8, sub, -)
|
111 |
|
|
__atomic_do_and_fetch (long long, 8, or, |)
|
112 |
|
|
__atomic_do_and_fetch (long long, 8, and, &)
|
113 |
|
|
__atomic_do_and_fetch (long long, 8, xor, |)
|
114 |
|
|
__atomic_do_and_fetch (long long, 8, nand, &)
|
115 |
|
|
#define __atomic_exchange_methods(type, size) \
|
116 |
|
|
bool \
|
117 |
|
|
__atomic_compare_exchange_##size(volatile type* ptr, type* oldvalp, \
|
118 |
|
|
type newval, bool weak __unused, \
|
119 |
|
|
int models, int modelf __unused) \
|
120 |
|
|
{ \
|
121 |
|
|
type oldval = *oldvalp; \
|
122 |
|
|
pre_atomic_barrier(models); \
|
123 |
|
|
type retval = atomic_val_compare_and_exchange(ptr, oldval, newval); \
|
124 |
|
|
post_atomic_barrier(models); \
|
125 |
|
|
bool success = (retval == oldval); \
|
126 |
|
|
*oldvalp = retval; \
|
127 |
|
|
return success; \
|
128 |
|
|
} \
|
129 |
|
|
\
|
130 |
|
|
type \
|
131 |
|
|
__atomic_exchange_##size(volatile type* ptr, type val, int model) \
|
132 |
|
|
{ \
|
133 |
|
|
pre_atomic_barrier(model); \
|
134 |
|
|
type retval = atomic_exchange(ptr, val); \
|
135 |
|
|
post_atomic_barrier(model); \
|
136 |
|
|
return retval; \
|
137 |
|
|
}
|
138 |
|
|
__atomic_exchange_methods (int, 4)
|
139 |
|
|
__atomic_exchange_methods (long long, 8)
|
140 |
|
|
|
141 |
|
|
/* Subword methods require the same approach for both TILEPro and
|
142 |
|
|
TILE-Gx. We load the background data for the word, insert the
|
143 |
|
|
desired subword piece, then compare-and-exchange it into place. */
|
144 |
|
|
#define u8 unsigned char
|
145 |
|
|
#define u16 unsigned short
|
146 |
|
|
#define __atomic_subword_cmpxchg(type, size) \
|
147 |
|
|
\
|
148 |
|
|
bool \
|
149 |
|
|
__atomic_compare_exchange_##size(volatile type* ptr, type* guess, \
|
150 |
|
|
type val, bool weak __unused, int models, \
|
151 |
|
|
int modelf __unused) \
|
152 |
|
|
{ \
|
153 |
|
|
pre_atomic_barrier(models); \
|
154 |
|
|
unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL); \
|
155 |
|
|
const int shift = ((unsigned long)ptr & 3UL) * 8; \
|
156 |
|
|
const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1; \
|
157 |
|
|
const unsigned int bgmask = ~(valmask << shift); \
|
158 |
|
|
unsigned int oldword = *p; \
|
159 |
|
|
type oldval = (oldword >> shift) & valmask; \
|
160 |
|
|
if (__builtin_expect((oldval == *guess), 1)) { \
|
161 |
|
|
unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \
|
162 |
|
|
oldword = atomic_val_compare_and_exchange(p, oldword, word); \
|
163 |
|
|
oldval = (oldword >> shift) & valmask; \
|
164 |
|
|
} \
|
165 |
|
|
post_atomic_barrier(models); \
|
166 |
|
|
bool success = (oldval == *guess); \
|
167 |
|
|
*guess = oldval; \
|
168 |
|
|
return success; \
|
169 |
|
|
}
|
170 |
|
|
__atomic_subword_cmpxchg (u8, 1)
|
171 |
|
|
__atomic_subword_cmpxchg (u16, 2)
|
172 |
|
|
/* For the atomic-update subword methods, we use the same approach as
|
173 |
|
|
above, but we retry until we succeed if the compare-and-exchange
|
174 |
|
|
fails. */
|
175 |
|
|
#define __atomic_subword(type, proto, top, expr, bottom) \
|
176 |
|
|
proto \
|
177 |
|
|
{ \
|
178 |
|
|
top \
|
179 |
|
|
unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL); \
|
180 |
|
|
const int shift = ((unsigned long)ptr & 3UL) * 8; \
|
181 |
|
|
const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1; \
|
182 |
|
|
const unsigned int bgmask = ~(valmask << shift); \
|
183 |
|
|
unsigned int oldword, xword = *p; \
|
184 |
|
|
type val, oldval; \
|
185 |
|
|
do { \
|
186 |
|
|
oldword = xword; \
|
187 |
|
|
oldval = (oldword >> shift) & valmask; \
|
188 |
|
|
val = expr; \
|
189 |
|
|
unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \
|
190 |
|
|
xword = atomic_val_compare_and_exchange(p, oldword, word); \
|
191 |
|
|
} while (__builtin_expect(xword != oldword, 0)); \
|
192 |
|
|
bottom \
|
193 |
|
|
}
|
194 |
|
|
#define __atomic_subword_fetch(type, funcname, expr, retval) \
|
195 |
|
|
__atomic_subword(type, \
|
196 |
|
|
type __atomic_ ## funcname(volatile type *ptr, type i, int model), \
|
197 |
|
|
pre_atomic_barrier(model);, \
|
198 |
|
|
expr, \
|
199 |
|
|
post_atomic_barrier(model); return retval;)
|
200 |
|
|
__atomic_subword_fetch (u8, fetch_add_1, oldval + i, oldval)
|
201 |
|
|
__atomic_subword_fetch (u8, fetch_sub_1, oldval - i, oldval)
|
202 |
|
|
__atomic_subword_fetch (u8, fetch_or_1, oldval | i, oldval)
|
203 |
|
|
__atomic_subword_fetch (u8, fetch_and_1, oldval & i, oldval)
|
204 |
|
|
__atomic_subword_fetch (u8, fetch_xor_1, oldval ^ i, oldval)
|
205 |
|
|
__atomic_subword_fetch (u8, fetch_nand_1, ~(oldval & i), oldval)
|
206 |
|
|
__atomic_subword_fetch (u16, fetch_add_2, oldval + i, oldval)
|
207 |
|
|
__atomic_subword_fetch (u16, fetch_sub_2, oldval - i, oldval)
|
208 |
|
|
__atomic_subword_fetch (u16, fetch_or_2, oldval | i, oldval)
|
209 |
|
|
__atomic_subword_fetch (u16, fetch_and_2, oldval & i, oldval)
|
210 |
|
|
__atomic_subword_fetch (u16, fetch_xor_2, oldval ^ i, oldval)
|
211 |
|
|
__atomic_subword_fetch (u16, fetch_nand_2, ~(oldval & i), oldval)
|
212 |
|
|
__atomic_subword_fetch (u8, add_fetch_1, oldval + i, val)
|
213 |
|
|
__atomic_subword_fetch (u8, sub_fetch_1, oldval - i, val)
|
214 |
|
|
__atomic_subword_fetch (u8, or_fetch_1, oldval | i, val)
|
215 |
|
|
__atomic_subword_fetch (u8, and_fetch_1, oldval & i, val)
|
216 |
|
|
__atomic_subword_fetch (u8, xor_fetch_1, oldval ^ i, val)
|
217 |
|
|
__atomic_subword_fetch (u8, nand_fetch_1, ~(oldval & i), val)
|
218 |
|
|
__atomic_subword_fetch (u16, add_fetch_2, oldval + i, val)
|
219 |
|
|
__atomic_subword_fetch (u16, sub_fetch_2, oldval - i, val)
|
220 |
|
|
__atomic_subword_fetch (u16, or_fetch_2, oldval | i, val)
|
221 |
|
|
__atomic_subword_fetch (u16, and_fetch_2, oldval & i, val)
|
222 |
|
|
__atomic_subword_fetch (u16, xor_fetch_2, oldval ^ i, val)
|
223 |
|
|
__atomic_subword_fetch (u16, nand_fetch_2, ~(oldval & i), val)
|
224 |
|
|
#define __atomic_subword_lock(type, size) \
|
225 |
|
|
\
|
226 |
|
|
__atomic_subword(type, \
|
227 |
|
|
type __atomic_exchange_##size(volatile type* ptr, type nval, int model), \
|
228 |
|
|
pre_atomic_barrier(model);, \
|
229 |
|
|
nval, \
|
230 |
|
|
post_atomic_barrier(model); return oldval;)
|
231 |
|
|
__atomic_subword_lock (u8, 1)
|
232 |
|
|
__atomic_subword_lock (u16, 2)
|