OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libstdc++-v3/] [include/] [parallel/] [partial_sum.h] - Blame information for rev 750

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 742 jeremybenn
// -*- C++ -*-
2
 
3
// Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4
//
5
// This file is part of the GNU ISO C++ Library.  This library is free
6
// software; you can redistribute it and/or modify it under the terms
7
// of the GNU General Public License as published by the Free Software
8
// Foundation; either version 3, or (at your option) any later
9
// version.
10
 
11
// This library is distributed in the hope that it will be useful, but
12
// WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
// General Public License for more details.
15
 
16
// Under Section 7 of GPL version 3, you are granted additional
17
// permissions described in the GCC Runtime Library Exception, version
18
// 3.1, as published by the Free Software Foundation.
19
 
20
// You should have received a copy of the GNU General Public License and
21
// a copy of the GCC Runtime Library Exception along with this program;
22
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23
// <http://www.gnu.org/licenses/>.
24
 
25
/** @file parallel/partial_sum.h
26
 *  @brief Parallel implementation of std::partial_sum(), i.e. prefix
27
*  sums.
28
 *  This file is a GNU parallel extension to the Standard C++ Library.
29
 */
30
 
31
// Written by Johannes Singler.
32
 
33
#ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
34
#define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
35
 
36
#include <omp.h>
37
#include <new>
38
#include <bits/stl_algobase.h>
39
#include <parallel/parallel.h>
40
#include <parallel/numericfwd.h>
41
 
42
namespace __gnu_parallel
43
{
44
  // Problem: there is no 0-element given.
45
 
46
  /** @brief Base case prefix sum routine.
47
   *  @param __begin Begin iterator of input sequence.
48
   *  @param __end End iterator of input sequence.
49
   *  @param __result Begin iterator of output sequence.
50
   *  @param __bin_op Associative binary function.
51
   *  @param __value Start value. Must be passed since the neutral
52
   *  element is unknown in general.
53
   *  @return End iterator of output sequence. */
54
  template<typename _IIter,
55
           typename _OutputIterator,
56
           typename _BinaryOperation>
57
    _OutputIterator
58
    __parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
59
                                    _OutputIterator __result,
60
                                    _BinaryOperation __bin_op,
61
      typename std::iterator_traits <_IIter>::value_type __value)
62
    {
63
      if (__begin == __end)
64
        return __result;
65
 
66
      while (__begin != __end)
67
        {
68
          __value = __bin_op(__value, *__begin);
69
          *__result = __value;
70
          ++__result;
71
          ++__begin;
72
        }
73
      return __result;
74
    }
75
 
76
  /** @brief Parallel partial sum implementation, two-phase approach,
77
      no recursion.
78
      *  @param __begin Begin iterator of input sequence.
79
      *  @param __end End iterator of input sequence.
80
      *  @param __result Begin iterator of output sequence.
81
      *  @param __bin_op Associative binary function.
82
      *  @param __n Length of sequence.
83
      *  @return End iterator of output sequence.
84
      */
85
  template<typename _IIter,
86
           typename _OutputIterator,
87
           typename _BinaryOperation>
88
    _OutputIterator
89
    __parallel_partial_sum_linear(_IIter __begin, _IIter __end,
90
                                  _OutputIterator __result,
91
                                  _BinaryOperation __bin_op,
92
      typename std::iterator_traits<_IIter>::difference_type __n)
93
    {
94
      typedef std::iterator_traits<_IIter> _TraitsType;
95
      typedef typename _TraitsType::value_type _ValueType;
96
      typedef typename _TraitsType::difference_type _DifferenceType;
97
 
98
      if (__begin == __end)
99
        return __result;
100
 
101
      _ThreadIndex __num_threads =
102
        std::min<_DifferenceType>(__get_max_threads(), __n - 1);
103
 
104
      if (__num_threads < 2)
105
        {
106
          *__result = *__begin;
107
          return __parallel_partial_sum_basecase(__begin + 1, __end,
108
                                                 __result + 1, __bin_op,
109
                                                 *__begin);
110
        }
111
 
112
      _DifferenceType* __borders;
113
      _ValueType* __sums;
114
 
115
      const _Settings& __s = _Settings::get();
116
 
117
#     pragma omp parallel num_threads(__num_threads)
118
      {
119
#       pragma omp single
120
        {
121
          __num_threads = omp_get_num_threads();
122
 
123
          __borders = new _DifferenceType[__num_threads + 2];
124
 
125
          if (__s.partial_sum_dilation == 1.0f)
126
            __equally_split(__n, __num_threads + 1, __borders);
127
          else
128
            {
129
              _DifferenceType __first_part_length =
130
                  std::max<_DifferenceType>(1,
131
                    __n / (1.0f + __s.partial_sum_dilation * __num_threads));
132
              _DifferenceType __chunk_length =
133
                  (__n - __first_part_length) / __num_threads;
134
              _DifferenceType __borderstart =
135
                  __n - __num_threads * __chunk_length;
136
              __borders[0] = 0;
137
              for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i)
138
                {
139
                  __borders[__i] = __borderstart;
140
                  __borderstart += __chunk_length;
141
                }
142
              __borders[__num_threads + 1] = __n;
143
            }
144
 
145
          __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
146
                                                           * __num_threads));
147
          _OutputIterator __target_end;
148
        } //single
149
 
150
        _ThreadIndex __iam = omp_get_thread_num();
151
        if (__iam == 0)
152
          {
153
            *__result = *__begin;
154
            __parallel_partial_sum_basecase(__begin + 1,
155
                                            __begin + __borders[1],
156
                                            __result + 1,
157
                                            __bin_op, *__begin);
158
            ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
159
          }
160
        else
161
          {
162
            ::new(&(__sums[__iam]))
163
              _ValueType(__gnu_parallel::accumulate(
164
                                         __begin + __borders[__iam] + 1,
165
                                         __begin + __borders[__iam + 1],
166
                                         *(__begin + __borders[__iam]),
167
                                         __bin_op,
168
                                         __gnu_parallel::sequential_tag()));
169
          }
170
 
171
#       pragma omp barrier
172
 
173
#       pragma omp single
174
        __parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
175
                                        __sums + 1, __bin_op, __sums[0]);
176
 
177
#       pragma omp barrier
178
 
179
        // Still same team.
180
        __parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
181
                                        __begin + __borders[__iam + 2],
182
                                        __result + __borders[__iam + 1],
183
                                        __bin_op, __sums[__iam]);
184
      } //parallel
185
 
186
      for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
187
        __sums[__i].~_ValueType();
188
      ::operator delete(__sums);
189
 
190
      delete[] __borders;
191
 
192
      return __result + __n;
193
    }
194
 
195
  /** @brief Parallel partial sum front-__end.
196
   *  @param __begin Begin iterator of input sequence.
197
   *  @param __end End iterator of input sequence.
198
   *  @param __result Begin iterator of output sequence.
199
   *  @param __bin_op Associative binary function.
200
   *  @return End iterator of output sequence. */
201
  template<typename _IIter,
202
           typename _OutputIterator,
203
           typename _BinaryOperation>
204
    _OutputIterator
205
    __parallel_partial_sum(_IIter __begin, _IIter __end,
206
                           _OutputIterator __result, _BinaryOperation __bin_op)
207
    {
208
      _GLIBCXX_CALL(__begin - __end)
209
 
210
      typedef std::iterator_traits<_IIter> _TraitsType;
211
      typedef typename _TraitsType::value_type _ValueType;
212
      typedef typename _TraitsType::difference_type _DifferenceType;
213
 
214
      _DifferenceType __n = __end - __begin;
215
 
216
      switch (_Settings::get().partial_sum_algorithm)
217
        {
218
        case LINEAR:
219
          // Need an initial offset.
220
          return __parallel_partial_sum_linear(__begin, __end, __result,
221
                                               __bin_op, __n);
222
        default:
223
          // Partial_sum algorithm not implemented.
224
          _GLIBCXX_PARALLEL_ASSERT(0);
225
          return __result + __n;
226
        }
227
    }
228
}
229
 
230
#endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.