1 |
35 |
ultra_embe |
// -*- C++ -*-
|
2 |
|
|
|
3 |
|
|
// Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
|
4 |
|
|
//
|
5 |
|
|
// This file is part of the GNU ISO C++ Library. This library is free
|
6 |
|
|
// software; you can redistribute it and/or modify it under the terms
|
7 |
|
|
// of the GNU General Public License as published by the Free Software
|
8 |
|
|
// Foundation; either version 3, or (at your option) any later
|
9 |
|
|
// version.
|
10 |
|
|
|
11 |
|
|
// This library is distributed in the hope that it will be useful, but
|
12 |
|
|
// WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
|
|
// General Public License for more details.
|
15 |
|
|
|
16 |
|
|
// Under Section 7 of GPL version 3, you are granted additional
|
17 |
|
|
// permissions described in the GCC Runtime Library Exception, version
|
18 |
|
|
// 3.1, as published by the Free Software Foundation.
|
19 |
|
|
|
20 |
|
|
// You should have received a copy of the GNU General Public License and
|
21 |
|
|
// a copy of the GCC Runtime Library Exception along with this program;
|
22 |
|
|
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
23 |
|
|
// <http://www.gnu.org/licenses/>.
|
24 |
|
|
|
25 |
|
|
/** @file parallel/partial_sum.h
|
26 |
|
|
* @brief Parallel implementation of std::partial_sum(), i.e. prefix
|
27 |
|
|
* sums.
|
28 |
|
|
* This file is a GNU parallel extension to the Standard C++ Library.
|
29 |
|
|
*/
|
30 |
|
|
|
31 |
|
|
// Written by Johannes Singler.
|
32 |
|
|
|
33 |
|
|
#ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
|
34 |
|
|
#define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
|
35 |
|
|
|
36 |
|
|
#include <omp.h>
|
37 |
|
|
#include <new>
|
38 |
|
|
#include <bits/stl_algobase.h>
|
39 |
|
|
#include <parallel/parallel.h>
|
40 |
|
|
#include <parallel/numericfwd.h>
|
41 |
|
|
|
42 |
|
|
namespace __gnu_parallel
|
43 |
|
|
{
|
44 |
|
|
// Problem: there is no 0-element given.
|
45 |
|
|
|
46 |
|
|
/** @brief Base case prefix sum routine.
|
47 |
|
|
* @param __begin Begin iterator of input sequence.
|
48 |
|
|
* @param __end End iterator of input sequence.
|
49 |
|
|
* @param __result Begin iterator of output sequence.
|
50 |
|
|
* @param __bin_op Associative binary function.
|
51 |
|
|
* @param __value Start value. Must be passed since the neutral
|
52 |
|
|
* element is unknown in general.
|
53 |
|
|
* @return End iterator of output sequence. */
|
54 |
|
|
template<typename _IIter,
|
55 |
|
|
typename _OutputIterator,
|
56 |
|
|
typename _BinaryOperation>
|
57 |
|
|
_OutputIterator
|
58 |
|
|
__parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
|
59 |
|
|
_OutputIterator __result,
|
60 |
|
|
_BinaryOperation __bin_op,
|
61 |
|
|
typename std::iterator_traits <_IIter>::value_type __value)
|
62 |
|
|
{
|
63 |
|
|
if (__begin == __end)
|
64 |
|
|
return __result;
|
65 |
|
|
|
66 |
|
|
while (__begin != __end)
|
67 |
|
|
{
|
68 |
|
|
__value = __bin_op(__value, *__begin);
|
69 |
|
|
*__result = __value;
|
70 |
|
|
++__result;
|
71 |
|
|
++__begin;
|
72 |
|
|
}
|
73 |
|
|
return __result;
|
74 |
|
|
}
|
75 |
|
|
|
76 |
|
|
/** @brief Parallel partial sum implementation, two-phase approach,
|
77 |
|
|
no recursion.
|
78 |
|
|
* @param __begin Begin iterator of input sequence.
|
79 |
|
|
* @param __end End iterator of input sequence.
|
80 |
|
|
* @param __result Begin iterator of output sequence.
|
81 |
|
|
* @param __bin_op Associative binary function.
|
82 |
|
|
* @param __n Length of sequence.
|
83 |
|
|
* @return End iterator of output sequence.
|
84 |
|
|
*/
|
85 |
|
|
template<typename _IIter,
|
86 |
|
|
typename _OutputIterator,
|
87 |
|
|
typename _BinaryOperation>
|
88 |
|
|
_OutputIterator
|
89 |
|
|
__parallel_partial_sum_linear(_IIter __begin, _IIter __end,
|
90 |
|
|
_OutputIterator __result,
|
91 |
|
|
_BinaryOperation __bin_op,
|
92 |
|
|
typename std::iterator_traits<_IIter>::difference_type __n)
|
93 |
|
|
{
|
94 |
|
|
typedef std::iterator_traits<_IIter> _TraitsType;
|
95 |
|
|
typedef typename _TraitsType::value_type _ValueType;
|
96 |
|
|
typedef typename _TraitsType::difference_type _DifferenceType;
|
97 |
|
|
|
98 |
|
|
if (__begin == __end)
|
99 |
|
|
return __result;
|
100 |
|
|
|
101 |
|
|
_ThreadIndex __num_threads =
|
102 |
|
|
std::min<_DifferenceType>(__get_max_threads(), __n - 1);
|
103 |
|
|
|
104 |
|
|
if (__num_threads < 2)
|
105 |
|
|
{
|
106 |
|
|
*__result = *__begin;
|
107 |
|
|
return __parallel_partial_sum_basecase(__begin + 1, __end,
|
108 |
|
|
__result + 1, __bin_op,
|
109 |
|
|
*__begin);
|
110 |
|
|
}
|
111 |
|
|
|
112 |
|
|
_DifferenceType* __borders;
|
113 |
|
|
_ValueType* __sums;
|
114 |
|
|
|
115 |
|
|
const _Settings& __s = _Settings::get();
|
116 |
|
|
|
117 |
|
|
# pragma omp parallel num_threads(__num_threads)
|
118 |
|
|
{
|
119 |
|
|
# pragma omp single
|
120 |
|
|
{
|
121 |
|
|
__num_threads = omp_get_num_threads();
|
122 |
|
|
|
123 |
|
|
__borders = new _DifferenceType[__num_threads + 2];
|
124 |
|
|
|
125 |
|
|
if (__s.partial_sum_dilation == 1.0f)
|
126 |
|
|
__equally_split(__n, __num_threads + 1, __borders);
|
127 |
|
|
else
|
128 |
|
|
{
|
129 |
|
|
_DifferenceType __first_part_length =
|
130 |
|
|
std::max<_DifferenceType>(1,
|
131 |
|
|
__n / (1.0f + __s.partial_sum_dilation * __num_threads));
|
132 |
|
|
_DifferenceType __chunk_length =
|
133 |
|
|
(__n - __first_part_length) / __num_threads;
|
134 |
|
|
_DifferenceType __borderstart =
|
135 |
|
|
__n - __num_threads * __chunk_length;
|
136 |
|
|
__borders[0] = 0;
|
137 |
|
|
for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i)
|
138 |
|
|
{
|
139 |
|
|
__borders[__i] = __borderstart;
|
140 |
|
|
__borderstart += __chunk_length;
|
141 |
|
|
}
|
142 |
|
|
__borders[__num_threads + 1] = __n;
|
143 |
|
|
}
|
144 |
|
|
|
145 |
|
|
__sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
|
146 |
|
|
* __num_threads));
|
147 |
|
|
_OutputIterator __target_end;
|
148 |
|
|
} //single
|
149 |
|
|
|
150 |
|
|
_ThreadIndex __iam = omp_get_thread_num();
|
151 |
|
|
if (__iam == 0)
|
152 |
|
|
{
|
153 |
|
|
*__result = *__begin;
|
154 |
|
|
__parallel_partial_sum_basecase(__begin + 1,
|
155 |
|
|
__begin + __borders[1],
|
156 |
|
|
__result + 1,
|
157 |
|
|
__bin_op, *__begin);
|
158 |
|
|
::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
|
159 |
|
|
}
|
160 |
|
|
else
|
161 |
|
|
{
|
162 |
|
|
::new(&(__sums[__iam]))
|
163 |
|
|
_ValueType(__gnu_parallel::accumulate(
|
164 |
|
|
__begin + __borders[__iam] + 1,
|
165 |
|
|
__begin + __borders[__iam + 1],
|
166 |
|
|
*(__begin + __borders[__iam]),
|
167 |
|
|
__bin_op,
|
168 |
|
|
__gnu_parallel::sequential_tag()));
|
169 |
|
|
}
|
170 |
|
|
|
171 |
|
|
# pragma omp barrier
|
172 |
|
|
|
173 |
|
|
# pragma omp single
|
174 |
|
|
__parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
|
175 |
|
|
__sums + 1, __bin_op, __sums[0]);
|
176 |
|
|
|
177 |
|
|
# pragma omp barrier
|
178 |
|
|
|
179 |
|
|
// Still same team.
|
180 |
|
|
__parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
|
181 |
|
|
__begin + __borders[__iam + 2],
|
182 |
|
|
__result + __borders[__iam + 1],
|
183 |
|
|
__bin_op, __sums[__iam]);
|
184 |
|
|
} //parallel
|
185 |
|
|
|
186 |
|
|
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
|
187 |
|
|
__sums[__i].~_ValueType();
|
188 |
|
|
::operator delete(__sums);
|
189 |
|
|
|
190 |
|
|
delete[] __borders;
|
191 |
|
|
|
192 |
|
|
return __result + __n;
|
193 |
|
|
}
|
194 |
|
|
|
195 |
|
|
/** @brief Parallel partial sum front-__end.
|
196 |
|
|
* @param __begin Begin iterator of input sequence.
|
197 |
|
|
* @param __end End iterator of input sequence.
|
198 |
|
|
* @param __result Begin iterator of output sequence.
|
199 |
|
|
* @param __bin_op Associative binary function.
|
200 |
|
|
* @return End iterator of output sequence. */
|
201 |
|
|
template<typename _IIter,
|
202 |
|
|
typename _OutputIterator,
|
203 |
|
|
typename _BinaryOperation>
|
204 |
|
|
_OutputIterator
|
205 |
|
|
__parallel_partial_sum(_IIter __begin, _IIter __end,
|
206 |
|
|
_OutputIterator __result, _BinaryOperation __bin_op)
|
207 |
|
|
{
|
208 |
|
|
_GLIBCXX_CALL(__begin - __end)
|
209 |
|
|
|
210 |
|
|
typedef std::iterator_traits<_IIter> _TraitsType;
|
211 |
|
|
typedef typename _TraitsType::value_type _ValueType;
|
212 |
|
|
typedef typename _TraitsType::difference_type _DifferenceType;
|
213 |
|
|
|
214 |
|
|
_DifferenceType __n = __end - __begin;
|
215 |
|
|
|
216 |
|
|
switch (_Settings::get().partial_sum_algorithm)
|
217 |
|
|
{
|
218 |
|
|
case LINEAR:
|
219 |
|
|
// Need an initial offset.
|
220 |
|
|
return __parallel_partial_sum_linear(__begin, __end, __result,
|
221 |
|
|
__bin_op, __n);
|
222 |
|
|
default:
|
223 |
|
|
// Partial_sum algorithm not implemented.
|
224 |
|
|
_GLIBCXX_PARALLEL_ASSERT(0);
|
225 |
|
|
return __result + __n;
|
226 |
|
|
}
|
227 |
|
|
}
|
228 |
|
|
}
|
229 |
|
|
|
230 |
|
|
#endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */
|