OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgomp/] [iter_ull.c] - Blame information for rev 737

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 735 jeremybenn
/* Copyright (C) 2005, 2008, 2009, 2011 Free Software Foundation, Inc.
2
   Contributed by Richard Henderson <rth@redhat.com>.
3
 
4
   This file is part of the GNU OpenMP Library (libgomp).
5
 
6
   Libgomp is free software; you can redistribute it and/or modify it
7
   under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 3, or (at your option)
9
   any later version.
10
 
11
   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12
   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13
   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14
   more details.
15
 
16
   Under Section 7 of GPL version 3, you are granted additional
17
   permissions described in the GCC Runtime Library Exception, version
18
   3.1, as published by the Free Software Foundation.
19
 
20
   You should have received a copy of the GNU General Public License and
21
   a copy of the GCC Runtime Library Exception along with this program;
22
   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23
   <http://www.gnu.org/licenses/>.  */
24
 
25
/* This file contains routines for managing work-share iteration, both
26
   for loops and sections.  */
27
 
28
#include "libgomp.h"
29
#include <stdlib.h>
30
 
31
typedef unsigned long long gomp_ull;
32
 
33
/* This function implements the STATIC scheduling method.  The caller should
34
   iterate *pstart <= x < *pend.  Return zero if there are more iterations
35
   to perform; nonzero if not.  Return less than 0 if this thread had
36
   received the absolutely last iteration.  */
37
 
38
int
39
gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend)
40
{
41
  struct gomp_thread *thr = gomp_thread ();
42
  struct gomp_team *team = thr->ts.team;
43
  struct gomp_work_share *ws = thr->ts.work_share;
44
  unsigned long nthreads = team ? team->nthreads : 1;
45
 
46
  if (thr->ts.static_trip == -1)
47
    return -1;
48
 
49
  /* Quick test for degenerate teams and orphaned constructs.  */
50
  if (nthreads == 1)
51
    {
52
      *pstart = ws->next_ull;
53
      *pend = ws->end_ull;
54
      thr->ts.static_trip = -1;
55
      return ws->next_ull == ws->end_ull;
56
    }
57
 
58
  /* We interpret chunk_size zero as "unspecified", which means that we
59
     should break up the iterations such that each thread makes only one
60
     trip through the outer loop.  */
61
  if (ws->chunk_size_ull == 0)
62
    {
63
      gomp_ull n, q, i, t, s0, e0, s, e;
64
 
65
      if (thr->ts.static_trip > 0)
66
        return 1;
67
 
68
      /* Compute the total number of iterations.  */
69
      if (__builtin_expect (ws->mode, 0) == 0)
70
        n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
71
      else
72
        n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
73
      i = thr->ts.team_id;
74
 
75
      /* Compute the "zero-based" start and end points.  That is, as
76
         if the loop began at zero and incremented by one.  */
77
      q = n / nthreads;
78
      t = n % nthreads;
79
      if (i < t)
80
        {
81
          t = 0;
82
          q++;
83
        }
84
      s0 = q * i + t;
85
      e0 = s0 + q;
86
 
87
      /* Notice when no iterations allocated for this thread.  */
88
      if (s0 >= e0)
89
        {
90
          thr->ts.static_trip = 1;
91
          return 1;
92
        }
93
 
94
      /* Transform these to the actual start and end numbers.  */
95
      s = s0 * ws->incr_ull + ws->next_ull;
96
      e = e0 * ws->incr_ull + ws->next_ull;
97
 
98
      *pstart = s;
99
      *pend = e;
100
      thr->ts.static_trip = (e0 == n ? -1 : 1);
101
      return 0;
102
    }
103
  else
104
    {
105
      gomp_ull n, s0, e0, i, c, s, e;
106
 
107
      /* Otherwise, each thread gets exactly chunk_size iterations
108
         (if available) each time through the loop.  */
109
 
110
      if (__builtin_expect (ws->mode, 0) == 0)
111
        n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
112
      else
113
        n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
114
      i = thr->ts.team_id;
115
      c = ws->chunk_size_ull;
116
 
117
      /* Initial guess is a C sized chunk positioned nthreads iterations
118
         in, offset by our thread number.  */
119
      s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c;
120
      e0 = s0 + c;
121
 
122
      /* Detect overflow.  */
123
      if (s0 >= n)
124
        return 1;
125
      if (e0 > n)
126
        e0 = n;
127
 
128
      /* Transform these to the actual start and end numbers.  */
129
      s = s0 * ws->incr_ull + ws->next_ull;
130
      e = e0 * ws->incr_ull + ws->next_ull;
131
 
132
      *pstart = s;
133
      *pend = e;
134
 
135
      if (e0 == n)
136
        thr->ts.static_trip = -1;
137
      else
138
        thr->ts.static_trip++;
139
      return 0;
140
    }
141
}
142
 
143
 
144
/* This function implements the DYNAMIC scheduling method.  Arguments are
145
   as for gomp_iter_ull_static_next.  This function must be called with
146
   ws->lock held.  */
147
 
148
bool
149
gomp_iter_ull_dynamic_next_locked (gomp_ull *pstart, gomp_ull *pend)
150
{
151
  struct gomp_thread *thr = gomp_thread ();
152
  struct gomp_work_share *ws = thr->ts.work_share;
153
  gomp_ull start, end, chunk, left;
154
 
155
  start = ws->next_ull;
156
  if (start == ws->end_ull)
157
    return false;
158
 
159
  chunk = ws->chunk_size_ull;
160
  left = ws->end_ull - start;
161
  if (__builtin_expect (ws->mode & 2, 0))
162
    {
163
      if (chunk < left)
164
        chunk = left;
165
    }
166
  else
167
    {
168
      if (chunk > left)
169
        chunk = left;
170
    }
171
  end = start + chunk;
172
 
173
  ws->next_ull = end;
174
  *pstart = start;
175
  *pend = end;
176
  return true;
177
}
178
 
179
 
180
#if defined HAVE_SYNC_BUILTINS && defined __LP64__
181
/* Similar, but doesn't require the lock held, and uses compare-and-swap
182
   instead.  Note that the only memory value that changes is ws->next_ull.  */
183
 
184
bool
185
gomp_iter_ull_dynamic_next (gomp_ull *pstart, gomp_ull *pend)
186
{
187
  struct gomp_thread *thr = gomp_thread ();
188
  struct gomp_work_share *ws = thr->ts.work_share;
189
  gomp_ull start, end, nend, chunk;
190
 
191
  end = ws->end_ull;
192
  chunk = ws->chunk_size_ull;
193
 
194
  if (__builtin_expect (ws->mode & 1, 1))
195
    {
196
      gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk);
197
      if (__builtin_expect (ws->mode & 2, 0) == 0)
198
        {
199
          if (tmp >= end)
200
            return false;
201
          nend = tmp + chunk;
202
          if (nend > end)
203
            nend = end;
204
          *pstart = tmp;
205
          *pend = nend;
206
          return true;
207
        }
208
      else
209
        {
210
          if (tmp <= end)
211
            return false;
212
          nend = tmp + chunk;
213
          if (nend < end)
214
            nend = end;
215
          *pstart = tmp;
216
          *pend = nend;
217
          return true;
218
        }
219
    }
220
 
221
  start = ws->next_ull;
222
  while (1)
223
    {
224
      gomp_ull left = end - start;
225
      gomp_ull tmp;
226
 
227
      if (start == end)
228
        return false;
229
 
230
      if (__builtin_expect (ws->mode & 2, 0))
231
        {
232
          if (chunk < left)
233
            chunk = left;
234
        }
235
      else
236
        {
237
          if (chunk > left)
238
            chunk = left;
239
        }
240
      nend = start + chunk;
241
 
242
      tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
243
      if (__builtin_expect (tmp == start, 1))
244
        break;
245
 
246
      start = tmp;
247
    }
248
 
249
  *pstart = start;
250
  *pend = nend;
251
  return true;
252
}
253
#endif /* HAVE_SYNC_BUILTINS */
254
 
255
 
256
/* This function implements the GUIDED scheduling method.  Arguments are
257
   as for gomp_iter_ull_static_next.  This function must be called with the
258
   work share lock held.  */
259
 
260
bool
261
gomp_iter_ull_guided_next_locked (gomp_ull *pstart, gomp_ull *pend)
262
{
263
  struct gomp_thread *thr = gomp_thread ();
264
  struct gomp_work_share *ws = thr->ts.work_share;
265
  struct gomp_team *team = thr->ts.team;
266
  gomp_ull nthreads = team ? team->nthreads : 1;
267
  gomp_ull n, q;
268
  gomp_ull start, end;
269
 
270
  if (ws->next_ull == ws->end_ull)
271
    return false;
272
 
273
  start = ws->next_ull;
274
  if (__builtin_expect (ws->mode, 0) == 0)
275
    n = (ws->end_ull - start) / ws->incr_ull;
276
  else
277
    n = (start - ws->end_ull) / -ws->incr_ull;
278
  q = (n + nthreads - 1) / nthreads;
279
 
280
  if (q < ws->chunk_size_ull)
281
    q = ws->chunk_size_ull;
282
  if (q <= n)
283
    end = start + q * ws->incr_ull;
284
  else
285
    end = ws->end_ull;
286
 
287
  ws->next_ull = end;
288
  *pstart = start;
289
  *pend = end;
290
  return true;
291
}
292
 
293
#if defined HAVE_SYNC_BUILTINS && defined __LP64__
294
/* Similar, but doesn't require the lock held, and uses compare-and-swap
295
   instead.  Note that the only memory value that changes is ws->next_ull.  */
296
 
297
bool
298
gomp_iter_ull_guided_next (gomp_ull *pstart, gomp_ull *pend)
299
{
300
  struct gomp_thread *thr = gomp_thread ();
301
  struct gomp_work_share *ws = thr->ts.work_share;
302
  struct gomp_team *team = thr->ts.team;
303
  gomp_ull nthreads = team ? team->nthreads : 1;
304
  gomp_ull start, end, nend, incr;
305
  gomp_ull chunk_size;
306
 
307
  start = ws->next_ull;
308
  end = ws->end_ull;
309
  incr = ws->incr_ull;
310
  chunk_size = ws->chunk_size_ull;
311
 
312
  while (1)
313
    {
314
      gomp_ull n, q;
315
      gomp_ull tmp;
316
 
317
      if (start == end)
318
        return false;
319
 
320
      if (__builtin_expect (ws->mode, 0) == 0)
321
        n = (end - start) / incr;
322
      else
323
        n = (start - end) / -incr;
324
      q = (n + nthreads - 1) / nthreads;
325
 
326
      if (q < chunk_size)
327
        q = chunk_size;
328
      if (__builtin_expect (q <= n, 1))
329
        nend = start + q * incr;
330
      else
331
        nend = end;
332
 
333
      tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
334
      if (__builtin_expect (tmp == start, 1))
335
        break;
336
 
337
      start = tmp;
338
    }
339
 
340
  *pstart = start;
341
  *pend = nend;
342
  return true;
343
}
344
#endif /* HAVE_SYNC_BUILTINS */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.