OpenCores

Rev 816	Rev 826
`/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.`	`/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.`
`Contributed by Richard Henderson <rth@redhat.com>.`	`Contributed by Richard Henderson <rth@redhat.com>.`

`This file is part of the GNU OpenMP Library (libgomp).`	`This file is part of the GNU OpenMP Library (libgomp).`

`Libgomp is free software; you can redistribute it and/or modify it`	`Libgomp is free software; you can redistribute it and/or modify it`
`under the terms of the GNU General Public License as published by`	`under the terms of the GNU General Public License as published by`
`the Free Software Foundation; either version 3, or (at your option)`	`the Free Software Foundation; either version 3, or (at your option)`
`any later version.`	`any later version.`

`Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY`	`Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY`
`WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS`	`WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS`
`FOR A PARTICULAR PURPOSE. See the GNU General Public License for`	`FOR A PARTICULAR PURPOSE. See the GNU General Public License for`
`more details.`	`more details.`

`Under Section 7 of GPL version 3, you are granted additional`	`Under Section 7 of GPL version 3, you are granted additional`
`permissions described in the GCC Runtime Library Exception, version`	`permissions described in the GCC Runtime Library Exception, version`
`3.1, as published by the Free Software Foundation.`	`3.1, as published by the Free Software Foundation.`

`You should have received a copy of the GNU General Public License and`	`You should have received a copy of the GNU General Public License and`
`a copy of the GCC Runtime Library Exception along with this program;`	`a copy of the GCC Runtime Library Exception along with this program;`
`see the files COPYING3 and COPYING.RUNTIME respectively. If not, see`	`see the files COPYING3 and COPYING.RUNTIME respectively. If not, see`
`<http://www.gnu.org/licenses/>. */`	`<http://www.gnu.org/licenses/>. */`

`/* This file contains routines for managing work-share iteration, both`	`/* This file contains routines for managing work-share iteration, both`
`for loops and sections. */`	`for loops and sections. */`

`#include "libgomp.h"`	`#include "libgomp.h"`
`#include <stdlib.h>`	`#include <stdlib.h>`


`/* This function implements the STATIC scheduling method. The caller should`	`/* This function implements the STATIC scheduling method. The caller should`
`iterate pstart <= x < pend. Return zero if there are more iterations`	`iterate pstart <= x < pend. Return zero if there are more iterations`
`to perform; nonzero if not. Return less than 0 if this thread had`	`to perform; nonzero if not. Return less than 0 if this thread had`
`received the absolutely last iteration. */`	`received the absolutely last iteration. */`

`int`	`int`
`gomp_iter_static_next (long pstart, long pend)`	`gomp_iter_static_next (long pstart, long pend)`
`{`	`{`
`struct gomp_thread *thr = gomp_thread ();`	`struct gomp_thread *thr = gomp_thread ();`
`struct gomp_team *team = thr->ts.team;`	`struct gomp_team *team = thr->ts.team;`
`struct gomp_work_share *ws = thr->ts.work_share;`	`struct gomp_work_share *ws = thr->ts.work_share;`
`unsigned long nthreads = team ? team->nthreads : 1;`	`unsigned long nthreads = team ? team->nthreads : 1;`

`if (thr->ts.static_trip == -1)`	`if (thr->ts.static_trip == -1)`
`return -1;`	`return -1;`

`/* Quick test for degenerate teams and orphaned constructs. */`	`/* Quick test for degenerate teams and orphaned constructs. */`
`if (nthreads == 1)`	`if (nthreads == 1)`
`{`	`{`
`*pstart = ws->next;`	`*pstart = ws->next;`
`*pend = ws->end;`	`*pend = ws->end;`
`thr->ts.static_trip = -1;`	`thr->ts.static_trip = -1;`
`return ws->next == ws->end;`	`return ws->next == ws->end;`
`}`	`}`

`/* We interpret chunk_size zero as "unspecified", which means that we`	`/* We interpret chunk_size zero as "unspecified", which means that we`
`should break up the iterations such that each thread makes only one`	`should break up the iterations such that each thread makes only one`
`trip through the outer loop. */`	`trip through the outer loop. */`
`if (ws->chunk_size == 0)`	`if (ws->chunk_size == 0)`
`{`	`{`
`unsigned long n, q, i;`	`unsigned long n, q, i;`
`unsigned long s0, e0;`	`unsigned long s0, e0;`
`long s, e;`	`long s, e;`

`if (thr->ts.static_trip > 0)`	`if (thr->ts.static_trip > 0)`
`return 1;`	`return 1;`

`/* Compute the total number of iterations. */`	`/* Compute the total number of iterations. */`
`s = ws->incr + (ws->incr > 0 ? -1 : 1);`	`s = ws->incr + (ws->incr > 0 ? -1 : 1);`
`n = (ws->end - ws->next + s) / ws->incr;`	`n = (ws->end - ws->next + s) / ws->incr;`
`i = thr->ts.team_id;`	`i = thr->ts.team_id;`

`/* Compute the "zero-based" start and end points. That is, as`	`/* Compute the "zero-based" start and end points. That is, as`
`if the loop began at zero and incremented by one. */`	`if the loop began at zero and incremented by one. */`
`q = n / nthreads;`	`q = n / nthreads;`
`q += (q * nthreads != n);`	`q += (q * nthreads != n);`
`s0 = q * i;`	`s0 = q * i;`
`e0 = s0 + q;`	`e0 = s0 + q;`
`if (e0 > n)`	`if (e0 > n)`
`e0 = n;`	`e0 = n;`

`/* Notice when no iterations allocated for this thread. */`	`/* Notice when no iterations allocated for this thread. */`
`if (s0 >= e0)`	`if (s0 >= e0)`
`{`	`{`
`thr->ts.static_trip = 1;`	`thr->ts.static_trip = 1;`
`return 1;`	`return 1;`
`}`	`}`

`/* Transform these to the actual start and end numbers. */`	`/* Transform these to the actual start and end numbers. */`
`s = (long)s0 * ws->incr + ws->next;`	`s = (long)s0 * ws->incr + ws->next;`
`e = (long)e0 * ws->incr + ws->next;`	`e = (long)e0 * ws->incr + ws->next;`

`*pstart = s;`	`*pstart = s;`
`*pend = e;`	`*pend = e;`
`thr->ts.static_trip = (e0 == n ? -1 : 1);`	`thr->ts.static_trip = (e0 == n ? -1 : 1);`
`return 0;`	`return 0;`
`}`	`}`
`else`	`else`
`{`	`{`
`unsigned long n, s0, e0, i, c;`	`unsigned long n, s0, e0, i, c;`
`long s, e;`	`long s, e;`

`/* Otherwise, each thread gets exactly chunk_size iterations`	`/* Otherwise, each thread gets exactly chunk_size iterations`
`(if available) each time through the loop. */`	`(if available) each time through the loop. */`

`s = ws->incr + (ws->incr > 0 ? -1 : 1);`	`s = ws->incr + (ws->incr > 0 ? -1 : 1);`
`n = (ws->end - ws->next + s) / ws->incr;`	`n = (ws->end - ws->next + s) / ws->incr;`
`i = thr->ts.team_id;`	`i = thr->ts.team_id;`
`c = ws->chunk_size;`	`c = ws->chunk_size;`

`/* Initial guess is a C sized chunk positioned nthreads iterations`	`/* Initial guess is a C sized chunk positioned nthreads iterations`
`in, offset by our thread number. */`	`in, offset by our thread number. */`
`s0 = (thr->ts.static_trip * nthreads + i) * c;`	`s0 = (thr->ts.static_trip * nthreads + i) * c;`
`e0 = s0 + c;`	`e0 = s0 + c;`

`/* Detect overflow. */`	`/* Detect overflow. */`
`if (s0 >= n)`	`if (s0 >= n)`
`return 1;`	`return 1;`
`if (e0 > n)`	`if (e0 > n)`
`e0 = n;`	`e0 = n;`

`/* Transform these to the actual start and end numbers. */`	`/* Transform these to the actual start and end numbers. */`
`s = (long)s0 * ws->incr + ws->next;`	`s = (long)s0 * ws->incr + ws->next;`
`e = (long)e0 * ws->incr + ws->next;`	`e = (long)e0 * ws->incr + ws->next;`

`*pstart = s;`	`*pstart = s;`
`*pend = e;`	`*pend = e;`

`if (e0 == n)`	`if (e0 == n)`
`thr->ts.static_trip = -1;`	`thr->ts.static_trip = -1;`
`else`	`else`
`thr->ts.static_trip++;`	`thr->ts.static_trip++;`
`return 0;`	`return 0;`
`}`	`}`
`}`	`}`


`/* This function implements the DYNAMIC scheduling method. Arguments are`	`/* This function implements the DYNAMIC scheduling method. Arguments are`
`as for gomp_iter_static_next. This function must be called with ws->lock`	`as for gomp_iter_static_next. This function must be called with ws->lock`
`held. */`	`held. */`

`bool`	`bool`
`gomp_iter_dynamic_next_locked (long pstart, long pend)`	`gomp_iter_dynamic_next_locked (long pstart, long pend)`
`{`	`{`
`struct gomp_thread *thr = gomp_thread ();`	`struct gomp_thread *thr = gomp_thread ();`
`struct gomp_work_share *ws = thr->ts.work_share;`	`struct gomp_work_share *ws = thr->ts.work_share;`
`long start, end, chunk, left;`	`long start, end, chunk, left;`

`start = ws->next;`	`start = ws->next;`
`if (start == ws->end)`	`if (start == ws->end)`
`return false;`	`return false;`

`chunk = ws->chunk_size;`	`chunk = ws->chunk_size;`
`left = ws->end - start;`	`left = ws->end - start;`
`if (ws->incr < 0)`	`if (ws->incr < 0)`
`{`	`{`
`if (chunk < left)`	`if (chunk < left)`
`chunk = left;`	`chunk = left;`
`}`	`}`
`else`	`else`
`{`	`{`
`if (chunk > left)`	`if (chunk > left)`
`chunk = left;`	`chunk = left;`
`}`	`}`
`end = start + chunk;`	`end = start + chunk;`

`ws->next = end;`	`ws->next = end;`
`*pstart = start;`	`*pstart = start;`
`*pend = end;`	`*pend = end;`
`return true;`	`return true;`
`}`	`}`


`#ifdef HAVE_SYNC_BUILTINS`	`#ifdef HAVE_SYNC_BUILTINS`
`/* Similar, but doesn't require the lock held, and uses compare-and-swap`	`/* Similar, but doesn't require the lock held, and uses compare-and-swap`
`instead. Note that the only memory value that changes is ws->next. */`	`instead. Note that the only memory value that changes is ws->next. */`

`bool`	`bool`
`gomp_iter_dynamic_next (long pstart, long pend)`	`gomp_iter_dynamic_next (long pstart, long pend)`
`{`	`{`
`struct gomp_thread *thr = gomp_thread ();`	`struct gomp_thread *thr = gomp_thread ();`
`struct gomp_work_share *ws = thr->ts.work_share;`	`struct gomp_work_share *ws = thr->ts.work_share;`
`long start, end, nend, chunk, incr;`	`long start, end, nend, chunk, incr;`

`end = ws->end;`	`end = ws->end;`
`incr = ws->incr;`	`incr = ws->incr;`
`chunk = ws->chunk_size;`	`chunk = ws->chunk_size;`

`if (__builtin_expect (ws->mode, 1))`	`if (__builtin_expect (ws->mode, 1))`
`{`	`{`
`long tmp = __sync_fetch_and_add (&ws->next, chunk);`	`long tmp = __sync_fetch_and_add (&ws->next, chunk);`
`if (incr > 0)`	`if (incr > 0)`
`{`	`{`
`if (tmp >= end)`	`if (tmp >= end)`
`return false;`	`return false;`
`nend = tmp + chunk;`	`nend = tmp + chunk;`
`if (nend > end)`	`if (nend > end)`
`nend = end;`	`nend = end;`
`*pstart = tmp;`	`*pstart = tmp;`
`*pend = nend;`	`*pend = nend;`
`return true;`	`return true;`
`}`	`}`
`else`	`else`
`{`	`{`
`if (tmp <= end)`	`if (tmp <= end)`
`return false;`	`return false;`
`nend = tmp + chunk;`	`nend = tmp + chunk;`
`if (nend < end)`	`if (nend < end)`
`nend = end;`	`nend = end;`
`*pstart = tmp;`	`*pstart = tmp;`
`*pend = nend;`	`*pend = nend;`
`return true;`	`return true;`
`}`	`}`
`}`	`}`

`start = ws->next;`	`start = ws->next;`
`while (1)`	`while (1)`
`{`	`{`
`long left = end - start;`	`long left = end - start;`
`long tmp;`	`long tmp;`

`if (start == end)`	`if (start == end)`
`return false;`	`return false;`

`if (incr < 0)`	`if (incr < 0)`
`{`	`{`
`if (chunk < left)`	`if (chunk < left)`
`chunk = left;`	`chunk = left;`
`}`	`}`
`else`	`else`
`{`	`{`
`if (chunk > left)`	`if (chunk > left)`
`chunk = left;`	`chunk = left;`
`}`	`}`
`nend = start + chunk;`	`nend = start + chunk;`

`tmp = __sync_val_compare_and_swap (&ws->next, start, nend);`	`tmp = __sync_val_compare_and_swap (&ws->next, start, nend);`
`if (__builtin_expect (tmp == start, 1))`	`if (__builtin_expect (tmp == start, 1))`
`break;`	`break;`

`start = tmp;`	`start = tmp;`
`}`	`}`

`*pstart = start;`	`*pstart = start;`
`*pend = nend;`	`*pend = nend;`
`return true;`	`return true;`
`}`	`}`
`#endif /* HAVE_SYNC_BUILTINS */`	`#endif /* HAVE_SYNC_BUILTINS */`


`/* This function implements the GUIDED scheduling method. Arguments are`	`/* This function implements the GUIDED scheduling method. Arguments are`
`as for gomp_iter_static_next. This function must be called with the`	`as for gomp_iter_static_next. This function must be called with the`
`work share lock held. */`	`work share lock held. */`

`bool`	`bool`
`gomp_iter_guided_next_locked (long pstart, long pend)`	`gomp_iter_guided_next_locked (long pstart, long pend)`
`{`	`{`
`struct gomp_thread *thr = gomp_thread ();`	`struct gomp_thread *thr = gomp_thread ();`
`struct gomp_work_share *ws = thr->ts.work_share;`	`struct gomp_work_share *ws = thr->ts.work_share;`
`struct gomp_team *team = thr->ts.team;`	`struct gomp_team *team = thr->ts.team;`
`unsigned long nthreads = team ? team->nthreads : 1;`	`unsigned long nthreads = team ? team->nthreads : 1;`
`unsigned long n, q;`	`unsigned long n, q;`
`long start, end;`	`long start, end;`

`if (ws->next == ws->end)`	`if (ws->next == ws->end)`
`return false;`	`return false;`

`start = ws->next;`	`start = ws->next;`
`n = (ws->end - start) / ws->incr;`	`n = (ws->end - start) / ws->incr;`
`q = (n + nthreads - 1) / nthreads;`	`q = (n + nthreads - 1) / nthreads;`

`if (q < ws->chunk_size)`	`if (q < ws->chunk_size)`
`q = ws->chunk_size;`	`q = ws->chunk_size;`
`if (q <= n)`	`if (q <= n)`
`end = start + q * ws->incr;`	`end = start + q * ws->incr;`
`else`	`else`
`end = ws->end;`	`end = ws->end;`

`ws->next = end;`	`ws->next = end;`
`*pstart = start;`	`*pstart = start;`
`*pend = end;`	`*pend = end;`
`return true;`	`return true;`
`}`	`}`

`#ifdef HAVE_SYNC_BUILTINS`	`#ifdef HAVE_SYNC_BUILTINS`
`/* Similar, but doesn't require the lock held, and uses compare-and-swap`	`/* Similar, but doesn't require the lock held, and uses compare-and-swap`
`instead. Note that the only memory value that changes is ws->next. */`	`instead. Note that the only memory value that changes is ws->next. */`

`bool`	`bool`
`gomp_iter_guided_next (long pstart, long pend)`	`gomp_iter_guided_next (long pstart, long pend)`
`{`	`{`
`struct gomp_thread *thr = gomp_thread ();`	`struct gomp_thread *thr = gomp_thread ();`
`struct gomp_work_share *ws = thr->ts.work_share;`	`struct gomp_work_share *ws = thr->ts.work_share;`
`struct gomp_team *team = thr->ts.team;`	`struct gomp_team *team = thr->ts.team;`
`unsigned long nthreads = team ? team->nthreads : 1;`	`unsigned long nthreads = team ? team->nthreads : 1;`
`long start, end, nend, incr;`	`long start, end, nend, incr;`
`unsigned long chunk_size;`	`unsigned long chunk_size;`

`start = ws->next;`	`start = ws->next;`
`end = ws->end;`	`end = ws->end;`
`incr = ws->incr;`	`incr = ws->incr;`
`chunk_size = ws->chunk_size;`	`chunk_size = ws->chunk_size;`

`while (1)`	`while (1)`
`{`	`{`
`unsigned long n, q;`	`unsigned long n, q;`
`long tmp;`	`long tmp;`

`if (start == end)`	`if (start == end)`
`return false;`	`return false;`

`n = (end - start) / incr;`	`n = (end - start) / incr;`
`q = (n + nthreads - 1) / nthreads;`	`q = (n + nthreads - 1) / nthreads;`

`if (q < chunk_size)`	`if (q < chunk_size)`
`q = chunk_size;`	`q = chunk_size;`
`if (__builtin_expect (q <= n, 1))`	`if (__builtin_expect (q <= n, 1))`
`nend = start + q * incr;`	`nend = start + q * incr;`
`else`	`else`
`nend = end;`	`nend = end;`

`tmp = __sync_val_compare_and_swap (&ws->next, start, nend);`	`tmp = __sync_val_compare_and_swap (&ws->next, start, nend);`
`if (__builtin_expect (tmp == start, 1))`	`if (__builtin_expect (tmp == start, 1))`
`break;`	`break;`

`start = tmp;`	`start = tmp;`
`}`	`}`

`*pstart = start;`	`*pstart = start;`
`*pend = nend;`	`*pend = nend;`
`return true;`	`return true;`
`}`	`}`
`#endif /* HAVE_SYNC_BUILTINS */`	`#endif /* HAVE_SYNC_BUILTINS */`

/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.

/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.

   Contributed by Richard Henderson <rth@redhat.com>.

   Contributed by Richard Henderson <rth@redhat.com>.

   This file is part of the GNU OpenMP Library (libgomp).

   This file is part of the GNU OpenMP Library (libgomp).

   Libgomp is free software; you can redistribute it and/or modify it

   Libgomp is free software; you can redistribute it and/or modify it

   under the terms of the GNU General Public License as published by

   under the terms of the GNU General Public License as published by

   the Free Software Foundation; either version 3, or (at your option)

   the Free Software Foundation; either version 3, or (at your option)

   any later version.

   any later version.

   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY

   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY

   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for

   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for

   more details.

   more details.

   Under Section 7 of GPL version 3, you are granted additional

   Under Section 7 of GPL version 3, you are granted additional

   permissions described in the GCC Runtime Library Exception, version

   permissions described in the GCC Runtime Library Exception, version

   3.1, as published by the Free Software Foundation.

   3.1, as published by the Free Software Foundation.

   You should have received a copy of the GNU General Public License and

   You should have received a copy of the GNU General Public License and

   a copy of the GCC Runtime Library Exception along with this program;

   a copy of the GCC Runtime Library Exception along with this program;

   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see

   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see

   <http://www.gnu.org/licenses/>.  */

   <http://www.gnu.org/licenses/>.  */

/* This file contains routines for managing work-share iteration, both

/* This file contains routines for managing work-share iteration, both

   for loops and sections.  */

   for loops and sections.  */

#include "libgomp.h"

#include "libgomp.h"

#include <stdlib.h>

#include <stdlib.h>

/* This function implements the STATIC scheduling method.  The caller should

/* This function implements the STATIC scheduling method.  The caller should

   iterate *pstart <= x < *pend.  Return zero if there are more iterations

   iterate *pstart <= x < *pend.  Return zero if there are more iterations

   to perform; nonzero if not.  Return less than 0 if this thread had

   to perform; nonzero if not.  Return less than 0 if this thread had

   received the absolutely last iteration.  */

   received the absolutely last iteration.  */

int

int

gomp_iter_static_next (long *pstart, long *pend)

gomp_iter_static_next (long *pstart, long *pend)

  struct gomp_thread *thr = gomp_thread ();

  struct gomp_thread *thr = gomp_thread ();

  struct gomp_team *team = thr->ts.team;

  struct gomp_team *team = thr->ts.team;

  struct gomp_work_share *ws = thr->ts.work_share;

  struct gomp_work_share *ws = thr->ts.work_share;

  unsigned long nthreads = team ? team->nthreads : 1;

  unsigned long nthreads = team ? team->nthreads : 1;

  if (thr->ts.static_trip == -1)

  if (thr->ts.static_trip == -1)

    return -1;

    return -1;

  /* Quick test for degenerate teams and orphaned constructs.  */

  /* Quick test for degenerate teams and orphaned constructs.  */

  if (nthreads == 1)

  if (nthreads == 1)

      *pstart = ws->next;

      *pstart = ws->next;

      *pend = ws->end;

      *pend = ws->end;

      thr->ts.static_trip = -1;

      thr->ts.static_trip = -1;

      return ws->next == ws->end;

      return ws->next == ws->end;

  /* We interpret chunk_size zero as "unspecified", which means that we

  /* We interpret chunk_size zero as "unspecified", which means that we

     should break up the iterations such that each thread makes only one

     should break up the iterations such that each thread makes only one

     trip through the outer loop.  */

     trip through the outer loop.  */

  if (ws->chunk_size == 0)

  if (ws->chunk_size == 0)

      unsigned long n, q, i;

      unsigned long n, q, i;

      unsigned long s0, e0;

      unsigned long s0, e0;

      long s, e;

      long s, e;

      if (thr->ts.static_trip > 0)

      if (thr->ts.static_trip > 0)

        return 1;

        return 1;

      /* Compute the total number of iterations.  */

      /* Compute the total number of iterations.  */

      s = ws->incr + (ws->incr > 0 ? -1 : 1);

      s = ws->incr + (ws->incr > 0 ? -1 : 1);

      n = (ws->end - ws->next + s) / ws->incr;

      n = (ws->end - ws->next + s) / ws->incr;

      i = thr->ts.team_id;

      i = thr->ts.team_id;

      /* Compute the "zero-based" start and end points.  That is, as

      /* Compute the "zero-based" start and end points.  That is, as

         if the loop began at zero and incremented by one.  */

         if the loop began at zero and incremented by one.  */

      q = n / nthreads;

      q = n / nthreads;

      q += (q * nthreads != n);

      q += (q * nthreads != n);

      s0 = q * i;

      s0 = q * i;

      e0 = s0 + q;

      e0 = s0 + q;

      if (e0 > n)

      if (e0 > n)

        e0 = n;

        e0 = n;

      /* Notice when no iterations allocated for this thread.  */

      /* Notice when no iterations allocated for this thread.  */

      if (s0 >= e0)

      if (s0 >= e0)

          thr->ts.static_trip = 1;

          thr->ts.static_trip = 1;

          return 1;

          return 1;

      /* Transform these to the actual start and end numbers.  */

      /* Transform these to the actual start and end numbers.  */

      s = (long)s0 * ws->incr + ws->next;

      s = (long)s0 * ws->incr + ws->next;

      e = (long)e0 * ws->incr + ws->next;

      e = (long)e0 * ws->incr + ws->next;

      *pstart = s;

      *pstart = s;

      *pend = e;

      *pend = e;

      thr->ts.static_trip = (e0 == n ? -1 : 1);

      thr->ts.static_trip = (e0 == n ? -1 : 1);

      return 0;

      return 0;

  else

  else

      unsigned long n, s0, e0, i, c;

      unsigned long n, s0, e0, i, c;

      long s, e;

      long s, e;

      /* Otherwise, each thread gets exactly chunk_size iterations

      /* Otherwise, each thread gets exactly chunk_size iterations

         (if available) each time through the loop.  */

         (if available) each time through the loop.  */

      s = ws->incr + (ws->incr > 0 ? -1 : 1);

      s = ws->incr + (ws->incr > 0 ? -1 : 1);

      n = (ws->end - ws->next + s) / ws->incr;

      n = (ws->end - ws->next + s) / ws->incr;

      i = thr->ts.team_id;

      i = thr->ts.team_id;

      c = ws->chunk_size;

      c = ws->chunk_size;

      /* Initial guess is a C sized chunk positioned nthreads iterations

      /* Initial guess is a C sized chunk positioned nthreads iterations

         in, offset by our thread number.  */

         in, offset by our thread number.  */

      s0 = (thr->ts.static_trip * nthreads + i) * c;

      s0 = (thr->ts.static_trip * nthreads + i) * c;

      e0 = s0 + c;

      e0 = s0 + c;

      /* Detect overflow.  */

      /* Detect overflow.  */

      if (s0 >= n)

      if (s0 >= n)

        return 1;

        return 1;

      if (e0 > n)

      if (e0 > n)

        e0 = n;

        e0 = n;

      /* Transform these to the actual start and end numbers.  */

      /* Transform these to the actual start and end numbers.  */

      s = (long)s0 * ws->incr + ws->next;

      s = (long)s0 * ws->incr + ws->next;

      e = (long)e0 * ws->incr + ws->next;

      e = (long)e0 * ws->incr + ws->next;

      *pstart = s;

      *pstart = s;

      *pend = e;

      *pend = e;

      if (e0 == n)

      if (e0 == n)

        thr->ts.static_trip = -1;

        thr->ts.static_trip = -1;

      else

      else

        thr->ts.static_trip++;

        thr->ts.static_trip++;

      return 0;

      return 0;

/* This function implements the DYNAMIC scheduling method.  Arguments are

/* This function implements the DYNAMIC scheduling method.  Arguments are

   as for gomp_iter_static_next.  This function must be called with ws->lock

   as for gomp_iter_static_next.  This function must be called with ws->lock

   held.  */

   held.  */

bool

bool

gomp_iter_dynamic_next_locked (long *pstart, long *pend)

gomp_iter_dynamic_next_locked (long *pstart, long *pend)

  struct gomp_thread *thr = gomp_thread ();

  struct gomp_thread *thr = gomp_thread ();

  struct gomp_work_share *ws = thr->ts.work_share;

  struct gomp_work_share *ws = thr->ts.work_share;

  long start, end, chunk, left;

  long start, end, chunk, left;

  start = ws->next;

  start = ws->next;

  if (start == ws->end)

  if (start == ws->end)

    return false;

    return false;

  chunk = ws->chunk_size;

  chunk = ws->chunk_size;

  left = ws->end - start;

  left = ws->end - start;

  if (ws->incr < 0)

  if (ws->incr < 0)

      if (chunk < left)

      if (chunk < left)

        chunk = left;

        chunk = left;

  else

  else

      if (chunk > left)

      if (chunk > left)

        chunk = left;

        chunk = left;

  end = start + chunk;

  end = start + chunk;

  ws->next = end;

  ws->next = end;

  *pstart = start;

  *pstart = start;

  *pend = end;

  *pend = end;

  return true;

  return true;

#ifdef HAVE_SYNC_BUILTINS

#ifdef HAVE_SYNC_BUILTINS

/* Similar, but doesn't require the lock held, and uses compare-and-swap

/* Similar, but doesn't require the lock held, and uses compare-and-swap

   instead.  Note that the only memory value that changes is ws->next.  */

   instead.  Note that the only memory value that changes is ws->next.  */

bool

bool

gomp_iter_dynamic_next (long *pstart, long *pend)

gomp_iter_dynamic_next (long *pstart, long *pend)

  struct gomp_thread *thr = gomp_thread ();

  struct gomp_thread *thr = gomp_thread ();

  struct gomp_work_share *ws = thr->ts.work_share;

  struct gomp_work_share *ws = thr->ts.work_share;

  long start, end, nend, chunk, incr;

  long start, end, nend, chunk, incr;

  end = ws->end;

  end = ws->end;

  incr = ws->incr;

  incr = ws->incr;

  chunk = ws->chunk_size;

  chunk = ws->chunk_size;

  if (__builtin_expect (ws->mode, 1))

  if (__builtin_expect (ws->mode, 1))

      long tmp = __sync_fetch_and_add (&ws->next, chunk);

      long tmp = __sync_fetch_and_add (&ws->next, chunk);

      if (incr > 0)

      if (incr > 0)

          if (tmp >= end)

          if (tmp >= end)

            return false;

            return false;

          nend = tmp + chunk;

          nend = tmp + chunk;

          if (nend > end)

          if (nend > end)

            nend = end;

            nend = end;

          *pstart = tmp;

          *pstart = tmp;

          *pend = nend;

          *pend = nend;

          return true;

          return true;

      else

      else

          if (tmp <= end)

          if (tmp <= end)

            return false;

            return false;

          nend = tmp + chunk;

          nend = tmp + chunk;

          if (nend < end)

          if (nend < end)

            nend = end;

            nend = end;

          *pstart = tmp;

          *pstart = tmp;

          *pend = nend;

          *pend = nend;

          return true;

          return true;

  start = ws->next;

  start = ws->next;

  while (1)

  while (1)

      long left = end - start;

      long left = end - start;

      long tmp;

      long tmp;

      if (start == end)

      if (start == end)

        return false;

        return false;

      if (incr < 0)

      if (incr < 0)

          if (chunk < left)

          if (chunk < left)

            chunk = left;

            chunk = left;

      else

      else

          if (chunk > left)

          if (chunk > left)

            chunk = left;

            chunk = left;

      nend = start + chunk;

      nend = start + chunk;

      tmp = __sync_val_compare_and_swap (&ws->next, start, nend);

      tmp = __sync_val_compare_and_swap (&ws->next, start, nend);

      if (__builtin_expect (tmp == start, 1))

      if (__builtin_expect (tmp == start, 1))

        break;

        break;

      start = tmp;

      start = tmp;

  *pstart = start;

  *pstart = start;

  *pend = nend;

  *pend = nend;

  return true;

  return true;

#endif /* HAVE_SYNC_BUILTINS */

#endif /* HAVE_SYNC_BUILTINS */

/* This function implements the GUIDED scheduling method.  Arguments are

/* This function implements the GUIDED scheduling method.  Arguments are

   as for gomp_iter_static_next.  This function must be called with the

   as for gomp_iter_static_next.  This function must be called with the

   work share lock held.  */

   work share lock held.  */

bool

bool

gomp_iter_guided_next_locked (long *pstart, long *pend)

gomp_iter_guided_next_locked (long *pstart, long *pend)

  struct gomp_thread *thr = gomp_thread ();

  struct gomp_thread *thr = gomp_thread ();

  struct gomp_work_share *ws = thr->ts.work_share;

  struct gomp_work_share *ws = thr->ts.work_share;

  struct gomp_team *team = thr->ts.team;

  struct gomp_team *team = thr->ts.team;

  unsigned long nthreads = team ? team->nthreads : 1;

  unsigned long nthreads = team ? team->nthreads : 1;

  unsigned long n, q;

  unsigned long n, q;

  long start, end;

  long start, end;

  if (ws->next == ws->end)

  if (ws->next == ws->end)

    return false;

    return false;

  start = ws->next;

  start = ws->next;

  n = (ws->end - start) / ws->incr;

  n = (ws->end - start) / ws->incr;

  q = (n + nthreads - 1) / nthreads;

  q = (n + nthreads - 1) / nthreads;

  if (q < ws->chunk_size)

  if (q < ws->chunk_size)

    q = ws->chunk_size;

    q = ws->chunk_size;

  if (q <= n)

  if (q <= n)

    end = start + q * ws->incr;

    end = start + q * ws->incr;

  else

  else

    end = ws->end;

    end = ws->end;

  ws->next = end;

  ws->next = end;

  *pstart = start;

  *pstart = start;

  *pend = end;

  *pend = end;

  return true;

  return true;

#ifdef HAVE_SYNC_BUILTINS

#ifdef HAVE_SYNC_BUILTINS

/* Similar, but doesn't require the lock held, and uses compare-and-swap

/* Similar, but doesn't require the lock held, and uses compare-and-swap

   instead.  Note that the only memory value that changes is ws->next.  */

   instead.  Note that the only memory value that changes is ws->next.  */

bool

bool

gomp_iter_guided_next (long *pstart, long *pend)

gomp_iter_guided_next (long *pstart, long *pend)

  struct gomp_thread *thr = gomp_thread ();

  struct gomp_thread *thr = gomp_thread ();

  struct gomp_work_share *ws = thr->ts.work_share;

  struct gomp_work_share *ws = thr->ts.work_share;

  struct gomp_team *team = thr->ts.team;

  struct gomp_team *team = thr->ts.team;

  unsigned long nthreads = team ? team->nthreads : 1;

  unsigned long nthreads = team ? team->nthreads : 1;

  long start, end, nend, incr;

  long start, end, nend, incr;

  unsigned long chunk_size;

  unsigned long chunk_size;

  start = ws->next;

  start = ws->next;

  end = ws->end;

  end = ws->end;

  incr = ws->incr;

  incr = ws->incr;

  chunk_size = ws->chunk_size;

  chunk_size = ws->chunk_size;

  while (1)

  while (1)

      unsigned long n, q;

      unsigned long n, q;

      long tmp;

      long tmp;

      if (start == end)

      if (start == end)

        return false;

        return false;

      n = (end - start) / incr;

      n = (end - start) / incr;

      q = (n + nthreads - 1) / nthreads;

      q = (n + nthreads - 1) / nthreads;

      if (q < chunk_size)

      if (q < chunk_size)

        q = chunk_size;

        q = chunk_size;

      if (__builtin_expect (q <= n, 1))

      if (__builtin_expect (q <= n, 1))

        nend = start + q * incr;

        nend = start + q * incr;

      else

      else

        nend = end;

        nend = end;

      tmp = __sync_val_compare_and_swap (&ws->next, start, nend);

      tmp = __sync_val_compare_and_swap (&ws->next, start, nend);

      if (__builtin_expect (tmp == start, 1))

      if (__builtin_expect (tmp == start, 1))

        break;

        break;

      start = tmp;

      start = tmp;

  *pstart = start;

  *pstart = start;

  *pend = nend;

  *pend = nend;

  return true;

  return true;

#endif /* HAVE_SYNC_BUILTINS */

#endif /* HAVE_SYNC_BUILTINS */

Browse

Tools

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-stable/] [gcc-4.5.1/] [libgomp/] [iter.c] - Diff between revs 816 and 826