OpenCores
URL https://opencores.org/ocsvn/openrisc_2011-10-31/openrisc_2011-10-31/trunk

Subversion Repositories openrisc_2011-10-31

[/] [openrisc/] [trunk/] [gnu-src/] [gcc-4.5.1/] [libgomp/] [iter_ull.c] - Blame information for rev 497

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 273 jeremybenn
/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2
   Contributed by Richard Henderson <rth@redhat.com>.
3
 
4
   This file is part of the GNU OpenMP Library (libgomp).
5
 
6
   Libgomp is free software; you can redistribute it and/or modify it
7
   under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 3, or (at your option)
9
   any later version.
10
 
11
   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12
   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13
   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14
   more details.
15
 
16
   Under Section 7 of GPL version 3, you are granted additional
17
   permissions described in the GCC Runtime Library Exception, version
18
   3.1, as published by the Free Software Foundation.
19
 
20
   You should have received a copy of the GNU General Public License and
21
   a copy of the GCC Runtime Library Exception along with this program;
22
   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23
   <http://www.gnu.org/licenses/>.  */
24
 
25
/* This file contains routines for managing work-share iteration, both
26
   for loops and sections.  */
27
 
28
#include "libgomp.h"
29
#include <stdlib.h>
30
 
31
typedef unsigned long long gomp_ull;
32
 
33
/* This function implements the STATIC scheduling method.  The caller should
34
   iterate *pstart <= x < *pend.  Return zero if there are more iterations
35
   to perform; nonzero if not.  Return less than 0 if this thread had
36
   received the absolutely last iteration.  */
37
 
38
int
39
gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend)
40
{
41
  struct gomp_thread *thr = gomp_thread ();
42
  struct gomp_team *team = thr->ts.team;
43
  struct gomp_work_share *ws = thr->ts.work_share;
44
  unsigned long nthreads = team ? team->nthreads : 1;
45
 
46
  if (thr->ts.static_trip == -1)
47
    return -1;
48
 
49
  /* Quick test for degenerate teams and orphaned constructs.  */
50
  if (nthreads == 1)
51
    {
52
      *pstart = ws->next_ull;
53
      *pend = ws->end_ull;
54
      thr->ts.static_trip = -1;
55
      return ws->next_ull == ws->end_ull;
56
    }
57
 
58
  /* We interpret chunk_size zero as "unspecified", which means that we
59
     should break up the iterations such that each thread makes only one
60
     trip through the outer loop.  */
61
  if (ws->chunk_size_ull == 0)
62
    {
63
      gomp_ull n, q, i, s0, e0, s, e;
64
 
65
      if (thr->ts.static_trip > 0)
66
        return 1;
67
 
68
      /* Compute the total number of iterations.  */
69
      if (__builtin_expect (ws->mode, 0) == 0)
70
        n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
71
      else
72
        n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
73
      i = thr->ts.team_id;
74
 
75
      /* Compute the "zero-based" start and end points.  That is, as
76
         if the loop began at zero and incremented by one.  */
77
      q = n / nthreads;
78
      q += (q * nthreads != n);
79
      s0 = q * i;
80
      e0 = s0 + q;
81
      if (e0 > n)
82
        e0 = n;
83
 
84
      /* Notice when no iterations allocated for this thread.  */
85
      if (s0 >= e0)
86
        {
87
          thr->ts.static_trip = 1;
88
          return 1;
89
        }
90
 
91
      /* Transform these to the actual start and end numbers.  */
92
      s = s0 * ws->incr_ull + ws->next_ull;
93
      e = e0 * ws->incr_ull + ws->next_ull;
94
 
95
      *pstart = s;
96
      *pend = e;
97
      thr->ts.static_trip = (e0 == n ? -1 : 1);
98
      return 0;
99
    }
100
  else
101
    {
102
      gomp_ull n, s0, e0, i, c, s, e;
103
 
104
      /* Otherwise, each thread gets exactly chunk_size iterations
105
         (if available) each time through the loop.  */
106
 
107
      if (__builtin_expect (ws->mode, 0) == 0)
108
        n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
109
      else
110
        n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
111
      i = thr->ts.team_id;
112
      c = ws->chunk_size_ull;
113
 
114
      /* Initial guess is a C sized chunk positioned nthreads iterations
115
         in, offset by our thread number.  */
116
      s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c;
117
      e0 = s0 + c;
118
 
119
      /* Detect overflow.  */
120
      if (s0 >= n)
121
        return 1;
122
      if (e0 > n)
123
        e0 = n;
124
 
125
      /* Transform these to the actual start and end numbers.  */
126
      s = s0 * ws->incr_ull + ws->next_ull;
127
      e = e0 * ws->incr_ull + ws->next_ull;
128
 
129
      *pstart = s;
130
      *pend = e;
131
 
132
      if (e0 == n)
133
        thr->ts.static_trip = -1;
134
      else
135
        thr->ts.static_trip++;
136
      return 0;
137
    }
138
}
139
 
140
 
141
/* This function implements the DYNAMIC scheduling method.  Arguments are
142
   as for gomp_iter_ull_static_next.  This function must be called with
143
   ws->lock held.  */
144
 
145
bool
146
gomp_iter_ull_dynamic_next_locked (gomp_ull *pstart, gomp_ull *pend)
147
{
148
  struct gomp_thread *thr = gomp_thread ();
149
  struct gomp_work_share *ws = thr->ts.work_share;
150
  gomp_ull start, end, chunk, left;
151
 
152
  start = ws->next_ull;
153
  if (start == ws->end_ull)
154
    return false;
155
 
156
  chunk = ws->chunk_size_ull;
157
  left = ws->end_ull - start;
158
  if (__builtin_expect (ws->mode & 2, 0))
159
    {
160
      if (chunk < left)
161
        chunk = left;
162
    }
163
  else
164
    {
165
      if (chunk > left)
166
        chunk = left;
167
    }
168
  end = start + chunk;
169
 
170
  ws->next_ull = end;
171
  *pstart = start;
172
  *pend = end;
173
  return true;
174
}
175
 
176
 
177
#if defined HAVE_SYNC_BUILTINS && defined __LP64__
178
/* Similar, but doesn't require the lock held, and uses compare-and-swap
179
   instead.  Note that the only memory value that changes is ws->next_ull.  */
180
 
181
bool
182
gomp_iter_ull_dynamic_next (gomp_ull *pstart, gomp_ull *pend)
183
{
184
  struct gomp_thread *thr = gomp_thread ();
185
  struct gomp_work_share *ws = thr->ts.work_share;
186
  gomp_ull start, end, nend, chunk;
187
 
188
  end = ws->end_ull;
189
  chunk = ws->chunk_size_ull;
190
 
191
  if (__builtin_expect (ws->mode & 1, 1))
192
    {
193
      gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk);
194
      if (__builtin_expect (ws->mode & 2, 0) == 0)
195
        {
196
          if (tmp >= end)
197
            return false;
198
          nend = tmp + chunk;
199
          if (nend > end)
200
            nend = end;
201
          *pstart = tmp;
202
          *pend = nend;
203
          return true;
204
        }
205
      else
206
        {
207
          if (tmp <= end)
208
            return false;
209
          nend = tmp + chunk;
210
          if (nend < end)
211
            nend = end;
212
          *pstart = tmp;
213
          *pend = nend;
214
          return true;
215
        }
216
    }
217
 
218
  start = ws->next_ull;
219
  while (1)
220
    {
221
      gomp_ull left = end - start;
222
      gomp_ull tmp;
223
 
224
      if (start == end)
225
        return false;
226
 
227
      if (__builtin_expect (ws->mode & 2, 0))
228
        {
229
          if (chunk < left)
230
            chunk = left;
231
        }
232
      else
233
        {
234
          if (chunk > left)
235
            chunk = left;
236
        }
237
      nend = start + chunk;
238
 
239
      tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
240
      if (__builtin_expect (tmp == start, 1))
241
        break;
242
 
243
      start = tmp;
244
    }
245
 
246
  *pstart = start;
247
  *pend = nend;
248
  return true;
249
}
250
#endif /* HAVE_SYNC_BUILTINS */
251
 
252
 
253
/* This function implements the GUIDED scheduling method.  Arguments are
254
   as for gomp_iter_ull_static_next.  This function must be called with the
255
   work share lock held.  */
256
 
257
bool
258
gomp_iter_ull_guided_next_locked (gomp_ull *pstart, gomp_ull *pend)
259
{
260
  struct gomp_thread *thr = gomp_thread ();
261
  struct gomp_work_share *ws = thr->ts.work_share;
262
  struct gomp_team *team = thr->ts.team;
263
  gomp_ull nthreads = team ? team->nthreads : 1;
264
  gomp_ull n, q;
265
  gomp_ull start, end;
266
 
267
  if (ws->next_ull == ws->end_ull)
268
    return false;
269
 
270
  start = ws->next_ull;
271
  if (__builtin_expect (ws->mode, 0) == 0)
272
    n = (ws->end_ull - start) / ws->incr_ull;
273
  else
274
    n = (start - ws->end_ull) / -ws->incr_ull;
275
  q = (n + nthreads - 1) / nthreads;
276
 
277
  if (q < ws->chunk_size_ull)
278
    q = ws->chunk_size_ull;
279
  if (q <= n)
280
    end = start + q * ws->incr_ull;
281
  else
282
    end = ws->end_ull;
283
 
284
  ws->next_ull = end;
285
  *pstart = start;
286
  *pend = end;
287
  return true;
288
}
289
 
290
#if defined HAVE_SYNC_BUILTINS && defined __LP64__
291
/* Similar, but doesn't require the lock held, and uses compare-and-swap
292
   instead.  Note that the only memory value that changes is ws->next_ull.  */
293
 
294
bool
295
gomp_iter_ull_guided_next (gomp_ull *pstart, gomp_ull *pend)
296
{
297
  struct gomp_thread *thr = gomp_thread ();
298
  struct gomp_work_share *ws = thr->ts.work_share;
299
  struct gomp_team *team = thr->ts.team;
300
  gomp_ull nthreads = team ? team->nthreads : 1;
301
  gomp_ull start, end, nend, incr;
302
  gomp_ull chunk_size;
303
 
304
  start = ws->next_ull;
305
  end = ws->end_ull;
306
  incr = ws->incr_ull;
307
  chunk_size = ws->chunk_size_ull;
308
 
309
  while (1)
310
    {
311
      gomp_ull n, q;
312
      gomp_ull tmp;
313
 
314
      if (start == end)
315
        return false;
316
 
317
      if (__builtin_expect (ws->mode, 0) == 0)
318
        n = (end - start) / incr;
319
      else
320
        n = (start - end) / -incr;
321
      q = (n + nthreads - 1) / nthreads;
322
 
323
      if (q < chunk_size)
324
        q = chunk_size;
325
      if (__builtin_expect (q <= n, 1))
326
        nend = start + q * incr;
327
      else
328
        nend = end;
329
 
330
      tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
331
      if (__builtin_expect (tmp == start, 1))
332
        break;
333
 
334
      start = tmp;
335
    }
336
 
337
  *pstart = start;
338
  *pend = nend;
339
  return true;
340
}
341
#endif /* HAVE_SYNC_BUILTINS */

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.