1 |
737 |
jeremybenn |
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
2 |
|
|
Contributed by Torvald Riegel <triegel@redhat.com>.
|
3 |
|
|
|
4 |
|
|
This file is part of the GNU Transactional Memory Library (libitm).
|
5 |
|
|
|
6 |
|
|
Libitm is free software; you can redistribute it and/or modify it
|
7 |
|
|
under the terms of the GNU General Public License as published by
|
8 |
|
|
the Free Software Foundation; either version 3 of the License, or
|
9 |
|
|
(at your option) any later version.
|
10 |
|
|
|
11 |
|
|
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
|
12 |
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
13 |
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
14 |
|
|
more details.
|
15 |
|
|
|
16 |
|
|
Under Section 7 of GPL version 3, you are granted additional
|
17 |
|
|
permissions described in the GCC Runtime Library Exception, version
|
18 |
|
|
3.1, as published by the Free Software Foundation.
|
19 |
|
|
|
20 |
|
|
You should have received a copy of the GNU General Public License and
|
21 |
|
|
a copy of the GCC Runtime Library Exception along with this program;
|
22 |
|
|
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
23 |
|
|
<http://www.gnu.org/licenses/>. */
|
24 |
|
|
|
25 |
|
|
#include "libitm_i.h"
|
26 |
|
|
#include "futex.h"
|
27 |
|
|
#include <limits.h>
|
28 |
|
|
|
29 |
|
|
namespace GTM HIDDEN {
|
30 |
|
|
|
31 |
|
|
// Acquire a RW lock for reading.
|
32 |
|
|
|
33 |
|
|
void
|
34 |
|
|
gtm_rwlock::read_lock (gtm_thread *tx)
|
35 |
|
|
{
|
36 |
|
|
for (;;)
|
37 |
|
|
{
|
38 |
|
|
// Fast path: first announce our intent to read, then check for
|
39 |
|
|
// conflicting intents to write. The fence ensures that this happens
|
40 |
|
|
// in exactly this order.
|
41 |
|
|
tx->shared_state.store (0, memory_order_relaxed);
|
42 |
|
|
atomic_thread_fence (memory_order_seq_cst);
|
43 |
|
|
if (likely (writers.load (memory_order_relaxed) == 0))
|
44 |
|
|
return;
|
45 |
|
|
|
46 |
|
|
// There seems to be an active, waiting, or confirmed writer, so enter
|
47 |
|
|
// the futex-based slow path.
|
48 |
|
|
|
49 |
|
|
// Before waiting, we clear our read intent check whether there are any
|
50 |
|
|
// writers that might potentially wait for readers. If so, wake them.
|
51 |
|
|
// We need the barrier here for the same reason that we need it in
|
52 |
|
|
// read_unlock().
|
53 |
|
|
// TODO Potentially too many wake-ups. See comments in read_unlock().
|
54 |
|
|
tx->shared_state.store (-1, memory_order_relaxed);
|
55 |
|
|
atomic_thread_fence (memory_order_seq_cst);
|
56 |
|
|
if (writer_readers.load (memory_order_relaxed) > 0)
|
57 |
|
|
{
|
58 |
|
|
writer_readers.store (0, memory_order_relaxed);
|
59 |
|
|
futex_wake(&writer_readers, 1);
|
60 |
|
|
}
|
61 |
|
|
|
62 |
|
|
// Signal that there are waiting readers and wait until there is no
|
63 |
|
|
// writer anymore.
|
64 |
|
|
// TODO Spin here on writers for a while. Consider whether we woke
|
65 |
|
|
// any writers before?
|
66 |
|
|
while (writers.load (memory_order_relaxed))
|
67 |
|
|
{
|
68 |
|
|
// An active writer. Wait until it has finished. To avoid lost
|
69 |
|
|
// wake-ups, we need to use Dekker-like synchronization.
|
70 |
|
|
// Note that we cannot reset readers to zero when we see that there
|
71 |
|
|
// are no writers anymore after the barrier because this pending
|
72 |
|
|
// store could then lead to lost wake-ups at other readers.
|
73 |
|
|
readers.store (1, memory_order_relaxed);
|
74 |
|
|
atomic_thread_fence (memory_order_seq_cst);
|
75 |
|
|
if (writers.load (memory_order_relaxed))
|
76 |
|
|
futex_wait(&readers, 1);
|
77 |
|
|
}
|
78 |
|
|
|
79 |
|
|
// And we try again to acquire a read lock.
|
80 |
|
|
}
|
81 |
|
|
}
|
82 |
|
|
|
83 |
|
|
|
84 |
|
|
// Acquire a RW lock for writing. Generic version that also works for
|
85 |
|
|
// upgrades.
|
86 |
|
|
// Note that an upgrade might fail (and thus waste previous work done during
|
87 |
|
|
// this transaction) if there is another thread that tried to go into serial
|
88 |
|
|
// mode earlier (i.e., upgrades do not have higher priority than pure writers).
|
89 |
|
|
// However, this seems rare enough to not consider it further as we need both
|
90 |
|
|
// a non-upgrade writer and a writer to happen to switch to serial mode
|
91 |
|
|
// concurrently. If we'd want to handle this, a writer waiting for readers
|
92 |
|
|
// would have to coordinate with later arriving upgrades and hand over the
|
93 |
|
|
// lock to them, including the the reader-waiting state. We can try to support
|
94 |
|
|
// this if this will actually happen often enough in real workloads.
|
95 |
|
|
|
96 |
|
|
bool
|
97 |
|
|
gtm_rwlock::write_lock_generic (gtm_thread *tx)
|
98 |
|
|
{
|
99 |
|
|
// Try to acquire the write lock.
|
100 |
|
|
int w = 0;
|
101 |
|
|
if (unlikely (!writers.compare_exchange_strong (w, 1)))
|
102 |
|
|
{
|
103 |
|
|
// If this is an upgrade, we must not wait for other writers or
|
104 |
|
|
// upgrades.
|
105 |
|
|
if (tx != 0)
|
106 |
|
|
return false;
|
107 |
|
|
|
108 |
|
|
// There is already a writer. If there are no other waiting writers,
|
109 |
|
|
// switch to contended mode. We need seq_cst memory order to make the
|
110 |
|
|
// Dekker-style synchronization work.
|
111 |
|
|
if (w != 2)
|
112 |
|
|
w = writers.exchange (2);
|
113 |
|
|
while (w != 0)
|
114 |
|
|
{
|
115 |
|
|
futex_wait(&writers, 2);
|
116 |
|
|
w = writers.exchange (2);
|
117 |
|
|
}
|
118 |
|
|
}
|
119 |
|
|
|
120 |
|
|
// We have acquired the writer side of the R/W lock. Now wait for any
|
121 |
|
|
// readers that might still be active.
|
122 |
|
|
// We don't need an extra barrier here because the CAS and the xchg
|
123 |
|
|
// operations have full barrier semantics already.
|
124 |
|
|
// TODO In the worst case, this requires one wait/wake pair for each
|
125 |
|
|
// active reader. Reduce this!
|
126 |
|
|
for (gtm_thread *it = gtm_thread::list_of_threads; it != 0;
|
127 |
|
|
it = it->next_thread)
|
128 |
|
|
{
|
129 |
|
|
if (it == tx)
|
130 |
|
|
continue;
|
131 |
|
|
// Use a loop here to check reader flags again after waiting.
|
132 |
|
|
while (it->shared_state.load (memory_order_relaxed)
|
133 |
|
|
!= ~(typeof it->shared_state)0)
|
134 |
|
|
{
|
135 |
|
|
// An active reader. Wait until it has finished. To avoid lost
|
136 |
|
|
// wake-ups, we need to use Dekker-like synchronization.
|
137 |
|
|
// Note that we can reset writer_readers to zero when we see after
|
138 |
|
|
// the barrier that the reader has finished in the meantime;
|
139 |
|
|
// however, this is only possible because we are the only writer.
|
140 |
|
|
// TODO Spin for a while on this reader flag.
|
141 |
|
|
writer_readers.store (1, memory_order_relaxed);
|
142 |
|
|
atomic_thread_fence (memory_order_seq_cst);
|
143 |
|
|
if (it->shared_state.load (memory_order_relaxed)
|
144 |
|
|
!= ~(typeof it->shared_state)0)
|
145 |
|
|
futex_wait(&writer_readers, 1);
|
146 |
|
|
else
|
147 |
|
|
writer_readers.store (0, memory_order_relaxed);
|
148 |
|
|
}
|
149 |
|
|
}
|
150 |
|
|
|
151 |
|
|
return true;
|
152 |
|
|
}
|
153 |
|
|
|
154 |
|
|
// Acquire a RW lock for writing.
|
155 |
|
|
|
156 |
|
|
void
|
157 |
|
|
gtm_rwlock::write_lock ()
|
158 |
|
|
{
|
159 |
|
|
write_lock_generic (0);
|
160 |
|
|
}
|
161 |
|
|
|
162 |
|
|
|
163 |
|
|
// Upgrade a RW lock that has been locked for reading to a writing lock.
|
164 |
|
|
// Do this without possibility of another writer incoming. Return false
|
165 |
|
|
// if this attempt fails (i.e. another thread also upgraded).
|
166 |
|
|
|
167 |
|
|
bool
|
168 |
|
|
gtm_rwlock::write_upgrade (gtm_thread *tx)
|
169 |
|
|
{
|
170 |
|
|
return write_lock_generic (tx);
|
171 |
|
|
}
|
172 |
|
|
|
173 |
|
|
|
174 |
|
|
// Has to be called iff the previous upgrade was successful and after it is
|
175 |
|
|
// safe for the transaction to not be marked as a reader anymore.
|
176 |
|
|
|
177 |
|
|
void
|
178 |
|
|
gtm_rwlock::write_upgrade_finish (gtm_thread *tx)
|
179 |
|
|
{
|
180 |
|
|
// We are not a reader anymore. This is only safe to do after we have
|
181 |
|
|
// acquired the writer lock.
|
182 |
|
|
tx->shared_state.store (-1, memory_order_release);
|
183 |
|
|
}
|
184 |
|
|
|
185 |
|
|
|
186 |
|
|
// Release a RW lock from reading.
|
187 |
|
|
|
188 |
|
|
void
|
189 |
|
|
gtm_rwlock::read_unlock (gtm_thread *tx)
|
190 |
|
|
{
|
191 |
|
|
// We only need release memory order here because of privatization safety
|
192 |
|
|
// (this ensures that marking the transaction as inactive happens after
|
193 |
|
|
// any prior data accesses by this transaction, and that neither the
|
194 |
|
|
// compiler nor the hardware order this store earlier).
|
195 |
|
|
// ??? We might be able to avoid this release here if the compiler can't
|
196 |
|
|
// merge the release fence with the subsequent seq_cst fence.
|
197 |
|
|
tx->shared_state.store (-1, memory_order_release);
|
198 |
|
|
|
199 |
|
|
// If there is a writer waiting for readers, wake it up. We need the fence
|
200 |
|
|
// to avoid lost wake-ups. Furthermore, the privatization safety
|
201 |
|
|
// implementation in gtm_thread::try_commit() relies on the existence of
|
202 |
|
|
// this seq_cst fence.
|
203 |
|
|
// ??? We might not be the last active reader, so the wake-up might happen
|
204 |
|
|
// too early. How do we avoid this without slowing down readers too much?
|
205 |
|
|
// Each reader could scan the list of txns for other active readers but
|
206 |
|
|
// this can result in many cache misses. Use combining instead?
|
207 |
|
|
// TODO Sends out one wake-up for each reader in the worst case.
|
208 |
|
|
atomic_thread_fence (memory_order_seq_cst);
|
209 |
|
|
if (unlikely (writer_readers.load (memory_order_relaxed) > 0))
|
210 |
|
|
{
|
211 |
|
|
// No additional barrier needed here (see write_unlock()).
|
212 |
|
|
writer_readers.store (0, memory_order_relaxed);
|
213 |
|
|
futex_wake(&writer_readers, 1);
|
214 |
|
|
}
|
215 |
|
|
}
|
216 |
|
|
|
217 |
|
|
|
218 |
|
|
// Release a RW lock from writing.
|
219 |
|
|
|
220 |
|
|
void
|
221 |
|
|
gtm_rwlock::write_unlock ()
|
222 |
|
|
{
|
223 |
|
|
// This needs to have seq_cst memory order.
|
224 |
|
|
if (writers.fetch_sub (1) == 2)
|
225 |
|
|
{
|
226 |
|
|
// There might be waiting writers, so wake them.
|
227 |
|
|
writers.store (0, memory_order_relaxed);
|
228 |
|
|
if (futex_wake(&writers, 1) == 0)
|
229 |
|
|
{
|
230 |
|
|
// If we did not wake any waiting writers, we might indeed be the
|
231 |
|
|
// last writer (this can happen because write_lock_generic()
|
232 |
|
|
// exchanges 0 or 1 to 2 and thus might go to contended mode even if
|
233 |
|
|
// no other thread holds the write lock currently). Therefore, we
|
234 |
|
|
// have to wake up readers here as well. Execute a barrier after
|
235 |
|
|
// the previous relaxed reset of writers (Dekker-style), and fall
|
236 |
|
|
// through to the normal reader wake-up code.
|
237 |
|
|
atomic_thread_fence (memory_order_seq_cst);
|
238 |
|
|
}
|
239 |
|
|
else
|
240 |
|
|
return;
|
241 |
|
|
}
|
242 |
|
|
// No waiting writers, so wake up all waiting readers.
|
243 |
|
|
// Because the fetch_and_sub is a full barrier already, we don't need
|
244 |
|
|
// another barrier here (as in read_unlock()).
|
245 |
|
|
if (readers.load (memory_order_relaxed) > 0)
|
246 |
|
|
{
|
247 |
|
|
// No additional barrier needed here. The previous load must be in
|
248 |
|
|
// modification order because of the coherency constraints. Late stores
|
249 |
|
|
// by a reader are not a problem because readers do Dekker-style
|
250 |
|
|
// synchronization on writers.
|
251 |
|
|
readers.store (0, memory_order_relaxed);
|
252 |
|
|
futex_wake(&readers, INT_MAX);
|
253 |
|
|
}
|
254 |
|
|
}
|
255 |
|
|
|
256 |
|
|
} // namespace GTM
|