OpenCores
URL https://opencores.org/ocsvn/altor32/altor32/trunk

Subversion Repositories altor32

[/] [altor32/] [trunk/] [gcc-x64/] [or1knd-elf/] [or1knd-elf/] [include/] [c++/] [4.8.0/] [parallel/] [multiway_mergesort.h] - Blame information for rev 35

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 35 ultra_embe
// -*- C++ -*-
2
 
3
// Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4
//
5
// This file is part of the GNU ISO C++ Library.  This library is free
6
// software; you can redistribute it and/or modify it under the terms
7
// of the GNU General Public License as published by the Free Software
8
// Foundation; either version 3, or (at your option) any later
9
// version.
10
 
11
// This library is distributed in the hope that it will be useful, but
12
// WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
// General Public License for more details.
15
 
16
// Under Section 7 of GPL version 3, you are granted additional
17
// permissions described in the GCC Runtime Library Exception, version
18
// 3.1, as published by the Free Software Foundation.
19
 
20
// You should have received a copy of the GNU General Public License and
21
// a copy of the GCC Runtime Library Exception along with this program;
22
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23
// <http://www.gnu.org/licenses/>.
24
 
25
/** @file parallel/multiway_mergesort.h
26
 *  @brief Parallel multiway merge sort.
27
 *  This file is a GNU parallel extension to the Standard C++ Library.
28
 */
29
 
30
// Written by Johannes Singler.
31
 
32
#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
33
#define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
34
 
35
#include <vector>
36
 
37
#include <parallel/basic_iterator.h>
38
#include <bits/stl_algo.h>
39
#include <parallel/parallel.h>
40
#include <parallel/multiway_merge.h>
41
 
42
namespace __gnu_parallel
43
{
44
  /** @brief Subsequence description. */
45
  template<typename _DifferenceTp>
46
    struct _Piece
47
    {
48
      typedef _DifferenceTp _DifferenceType;
49
 
50
      /** @brief Begin of subsequence. */
51
      _DifferenceType _M_begin;
52
 
53
      /** @brief End of subsequence. */
54
      _DifferenceType _M_end;
55
    };
56
 
57
  /** @brief Data accessed by all threads.
58
   *
59
   *  PMWMS = parallel multiway mergesort */
60
  template<typename _RAIter>
61
    struct _PMWMSSortingData
62
    {
63
      typedef std::iterator_traits<_RAIter> _TraitsType;
64
      typedef typename _TraitsType::value_type _ValueType;
65
      typedef typename _TraitsType::difference_type _DifferenceType;
66
 
67
      /** @brief Number of threads involved. */
68
      _ThreadIndex _M_num_threads;
69
 
70
      /** @brief Input __begin. */
71
      _RAIter _M_source;
72
 
73
      /** @brief Start indices, per thread. */
74
      _DifferenceType* _M_starts;
75
 
76
      /** @brief Storage in which to sort. */
77
      _ValueType** _M_temporary;
78
 
79
      /** @brief Samples. */
80
      _ValueType* _M_samples;
81
 
82
      /** @brief Offsets to add to the found positions. */
83
      _DifferenceType* _M_offsets;
84
 
85
      /** @brief Pieces of data to merge @c [thread][__sequence] */
86
      std::vector<_Piece<_DifferenceType> >* _M_pieces;
87
  };
88
 
89
  /**
90
   *  @brief Select _M_samples from a sequence.
91
   *  @param __sd Pointer to algorithm data. _Result will be placed in
92
   *  @c __sd->_M_samples.
93
   *  @param __num_samples Number of _M_samples to select.
94
   */
95
  template<typename _RAIter, typename _DifferenceTp>
96
    void
97
    __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
98
                        _DifferenceTp __num_samples)
99
    {
100
      typedef std::iterator_traits<_RAIter> _TraitsType;
101
      typedef typename _TraitsType::value_type _ValueType;
102
      typedef _DifferenceTp _DifferenceType;
103
 
104
      _ThreadIndex __iam = omp_get_thread_num();
105
 
106
      _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
107
 
108
      __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
109
                      __num_samples + 1, __es);
110
 
111
      for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
112
        ::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
113
            _ValueType(__sd->_M_source[__sd->_M_starts[__iam]
114
                                       + __es[__i + 1]]);
115
 
116
      delete[] __es;
117
    }
118
 
119
  /** @brief Split consistently. */
120
  template<bool __exact, typename _RAIter,
121
           typename _Compare, typename _SortingPlacesIterator>
122
    struct _SplitConsistently
123
    { };
124
 
125
  /** @brief Split by exact splitting. */
126
  template<typename _RAIter, typename _Compare,
127
           typename _SortingPlacesIterator>
128
    struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>
129
    {
130
      void
131
      operator()(const _ThreadIndex __iam,
132
                 _PMWMSSortingData<_RAIter>* __sd,
133
                 _Compare& __comp,
134
                 const typename
135
                 std::iterator_traits<_RAIter>::difference_type
136
                 __num_samples) const
137
      {
138
#       pragma omp barrier
139
 
140
        std::vector<std::pair<_SortingPlacesIterator,
141
                              _SortingPlacesIterator> >
142
          __seqs(__sd->_M_num_threads);
143
        for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
144
          __seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
145
                                       __sd->_M_temporary[__s]
146
                                       + (__sd->_M_starts[__s + 1]
147
                                          - __sd->_M_starts[__s]));
148
 
149
        std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
150
 
151
        // if not last thread
152
        if (__iam < __sd->_M_num_threads - 1)
153
          multiseq_partition(__seqs.begin(), __seqs.end(),
154
                             __sd->_M_starts[__iam + 1], __offsets.begin(),
155
                             __comp);
156
 
157
        for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
158
          {
159
            // for each sequence
160
            if (__iam < (__sd->_M_num_threads - 1))
161
              __sd->_M_pieces[__iam][__seq]._M_end
162
                = __offsets[__seq] - __seqs[__seq].first;
163
            else
164
              // very end of this sequence
165
              __sd->_M_pieces[__iam][__seq]._M_end =
166
                __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
167
          }
168
 
169
#       pragma omp barrier
170
 
171
        for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
172
          {
173
            // For each sequence.
174
            if (__iam > 0)
175
              __sd->_M_pieces[__iam][__seq]._M_begin =
176
                __sd->_M_pieces[__iam - 1][__seq]._M_end;
177
            else
178
              // Absolute beginning.
179
              __sd->_M_pieces[__iam][__seq]._M_begin = 0;
180
          }
181
      }
182
  };
183
 
184
  /** @brief Split by sampling. */
185
  template<typename _RAIter, typename _Compare,
186
           typename _SortingPlacesIterator>
187
    struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>
188
    {
189
      void
190
      operator()(const _ThreadIndex __iam,
191
                 _PMWMSSortingData<_RAIter>* __sd,
192
                 _Compare& __comp,
193
                 const typename
194
                 std::iterator_traits<_RAIter>::difference_type
195
                 __num_samples) const
196
      {
197
        typedef std::iterator_traits<_RAIter> _TraitsType;
198
        typedef typename _TraitsType::value_type _ValueType;
199
        typedef typename _TraitsType::difference_type _DifferenceType;
200
 
201
        __determine_samples(__sd, __num_samples);
202
 
203
#       pragma omp barrier
204
 
205
#       pragma omp single
206
        __gnu_sequential::sort(__sd->_M_samples,
207
                               __sd->_M_samples
208
                               + (__num_samples * __sd->_M_num_threads),
209
                               __comp);
210
 
211
#       pragma omp barrier
212
 
213
        for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
214
          {
215
            // For each sequence.
216
            if (__num_samples * __iam > 0)
217
              __sd->_M_pieces[__iam][__s]._M_begin =
218
                std::lower_bound(__sd->_M_temporary[__s],
219
                                 __sd->_M_temporary[__s]
220
                                 + (__sd->_M_starts[__s + 1]
221
                                    - __sd->_M_starts[__s]),
222
                                 __sd->_M_samples[__num_samples * __iam],
223
                                 __comp)
224
                - __sd->_M_temporary[__s];
225
            else
226
              // Absolute beginning.
227
              __sd->_M_pieces[__iam][__s]._M_begin = 0;
228
 
229
            if ((__num_samples * (__iam + 1)) <
230
                (__num_samples * __sd->_M_num_threads))
231
              __sd->_M_pieces[__iam][__s]._M_end =
232
                std::lower_bound(__sd->_M_temporary[__s],
233
                                 __sd->_M_temporary[__s]
234
                                 + (__sd->_M_starts[__s + 1]
235
                                    - __sd->_M_starts[__s]),
236
                                 __sd->_M_samples[__num_samples * (__iam + 1)],
237
                                 __comp)
238
                - __sd->_M_temporary[__s];
239
            else
240
              // Absolute end.
241
              __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
242
                                                    - __sd->_M_starts[__s]);
243
          }
244
      }
245
  };
246
 
247
  template<bool __stable, typename _RAIter, typename _Compare>
248
    struct __possibly_stable_sort
249
    { };
250
 
251
  template<typename _RAIter, typename _Compare>
252
    struct __possibly_stable_sort<true, _RAIter, _Compare>
253
    {
254
      void operator()(const _RAIter& __begin,
255
                      const _RAIter& __end, _Compare& __comp) const
256
      { __gnu_sequential::stable_sort(__begin, __end, __comp); }
257
    };
258
 
259
  template<typename _RAIter, typename _Compare>
260
    struct __possibly_stable_sort<false, _RAIter, _Compare>
261
    {
262
      void operator()(const _RAIter __begin,
263
                      const _RAIter __end, _Compare& __comp) const
264
      { __gnu_sequential::sort(__begin, __end, __comp); }
265
    };
266
 
267
  template<bool __stable, typename Seq_RAIter,
268
           typename _RAIter, typename _Compare,
269
           typename DiffType>
270
    struct __possibly_stable_multiway_merge
271
    { };
272
 
273
  template<typename Seq_RAIter, typename _RAIter,
274
           typename _Compare, typename _DiffType>
275
    struct __possibly_stable_multiway_merge<true, Seq_RAIter,
276
                                            _RAIter, _Compare, _DiffType>
277
    {
278
      void operator()(const Seq_RAIter& __seqs_begin,
279
                      const Seq_RAIter& __seqs_end,
280
                      const _RAIter& __target,
281
                      _Compare& __comp,
282
                      _DiffType __length_am) const
283
      { stable_multiway_merge(__seqs_begin, __seqs_end, __target,
284
                              __length_am, __comp, sequential_tag()); }
285
    };
286
 
287
  template<typename Seq_RAIter, typename _RAIter,
288
           typename _Compare, typename _DiffType>
289
    struct __possibly_stable_multiway_merge<false, Seq_RAIter,
290
                                            _RAIter, _Compare, _DiffType>
291
    {
292
      void operator()(const Seq_RAIter& __seqs_begin,
293
                      const Seq_RAIter& __seqs_end,
294
                      const _RAIter& __target,
295
                      _Compare& __comp,
296
                      _DiffType __length_am) const
297
      { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
298
                       __comp, sequential_tag()); }
299
    };
300
 
301
  /** @brief PMWMS code executed by each thread.
302
   *  @param __sd Pointer to algorithm data.
303
   *  @param __comp Comparator.
304
   */
305
  template<bool __stable, bool __exact, typename _RAIter,
306
           typename _Compare>
307
    void
308
    parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
309
                          _Compare& __comp)
310
    {
311
      typedef std::iterator_traits<_RAIter> _TraitsType;
312
      typedef typename _TraitsType::value_type _ValueType;
313
      typedef typename _TraitsType::difference_type _DifferenceType;
314
 
315
      _ThreadIndex __iam = omp_get_thread_num();
316
 
317
      // Length of this thread's chunk, before merging.
318
      _DifferenceType __length_local =
319
        __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
320
 
321
      // Sort in temporary storage, leave space for sentinel.
322
 
323
      typedef _ValueType* _SortingPlacesIterator;
324
 
325
      __sd->_M_temporary[__iam] =
326
        static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
327
                                                * (__length_local + 1)));
328
 
329
      // Copy there.
330
      std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
331
                              __sd->_M_source + __sd->_M_starts[__iam]
332
                              + __length_local,
333
                              __sd->_M_temporary[__iam]);
334
 
335
      __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
336
        (__sd->_M_temporary[__iam],
337
         __sd->_M_temporary[__iam] + __length_local,
338
         __comp);
339
 
340
      // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
341
      // __sd->_M_temporary[__iam] + __length_local.
342
 
343
      // No barrier here: Synchronization is done by the splitting routine.
344
 
345
      _DifferenceType __num_samples =
346
        _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
347
      _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
348
        (__iam, __sd, __comp, __num_samples);
349
 
350
      // Offset from __target __begin, __length after merging.
351
      _DifferenceType __offset = 0, __length_am = 0;
352
      for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
353
        {
354
          __length_am += (__sd->_M_pieces[__iam][__s]._M_end
355
                          - __sd->_M_pieces[__iam][__s]._M_begin);
356
          __offset += __sd->_M_pieces[__iam][__s]._M_begin;
357
        }
358
 
359
      typedef std::vector<
360
        std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
361
        _SeqVector;
362
      _SeqVector __seqs(__sd->_M_num_threads);
363
 
364
      for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
365
        {
366
          __seqs[__s] =
367
            std::make_pair(__sd->_M_temporary[__s]
368
                           + __sd->_M_pieces[__iam][__s]._M_begin,
369
                           __sd->_M_temporary[__s]
370
                           + __sd->_M_pieces[__iam][__s]._M_end);
371
        }
372
 
373
      __possibly_stable_multiway_merge<
374
        __stable, typename _SeqVector::iterator,
375
        _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
376
                                     __sd->_M_source + __offset, __comp,
377
                                     __length_am);
378
 
379
#     pragma omp barrier
380
 
381
      for (_DifferenceType __i = 0; __i < __length_local; ++__i)
382
        __sd->_M_temporary[__iam][__i].~_ValueType();
383
      ::operator delete(__sd->_M_temporary[__iam]);
384
    }
385
 
386
  /** @brief PMWMS main call.
387
   *  @param __begin Begin iterator of sequence.
388
   *  @param __end End iterator of sequence.
389
   *  @param __comp Comparator.
390
   *  @param __num_threads Number of threads to use.
391
   */
392
  template<bool __stable, bool __exact, typename _RAIter,
393
           typename _Compare>
394
    void
395
    parallel_sort_mwms(_RAIter __begin, _RAIter __end,
396
                       _Compare __comp,
397
                       _ThreadIndex __num_threads)
398
    {
399
      _GLIBCXX_CALL(__end - __begin)
400
 
401
      typedef std::iterator_traits<_RAIter> _TraitsType;
402
      typedef typename _TraitsType::value_type _ValueType;
403
      typedef typename _TraitsType::difference_type _DifferenceType;
404
 
405
      _DifferenceType __n = __end - __begin;
406
 
407
      if (__n <= 1)
408
        return;
409
 
410
      // at least one element per thread
411
      if (__num_threads > __n)
412
        __num_threads = static_cast<_ThreadIndex>(__n);
413
 
414
      // shared variables
415
      _PMWMSSortingData<_RAIter> __sd;
416
      _DifferenceType* __starts;
417
      _DifferenceType __size;
418
 
419
#     pragma omp parallel num_threads(__num_threads)
420
      {
421
        __num_threads = omp_get_num_threads(); //no more threads than requested
422
 
423
#       pragma omp single
424
        {
425
          __sd._M_num_threads = __num_threads;
426
          __sd._M_source = __begin;
427
 
428
          __sd._M_temporary = new _ValueType*[__num_threads];
429
 
430
          if (!__exact)
431
            {
432
              __size =
433
                (_Settings::get().sort_mwms_oversampling * __num_threads - 1)
434
                * __num_threads;
435
              __sd._M_samples = static_cast<_ValueType*>
436
                (::operator new(__size * sizeof(_ValueType)));
437
            }
438
          else
439
            __sd._M_samples = 0;
440
 
441
          __sd._M_offsets = new _DifferenceType[__num_threads - 1];
442
          __sd._M_pieces
443
            = new std::vector<_Piece<_DifferenceType> >[__num_threads];
444
          for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)
445
            __sd._M_pieces[__s].resize(__num_threads);
446
          __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
447
 
448
          _DifferenceType __chunk_length = __n / __num_threads;
449
          _DifferenceType __split = __n % __num_threads;
450
          _DifferenceType __pos = 0;
451
          for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
452
            {
453
              __starts[__i] = __pos;
454
              __pos += ((__i < __split)
455
                        ? (__chunk_length + 1) : __chunk_length);
456
            }
457
          __starts[__num_threads] = __pos;
458
        } //single
459
 
460
        // Now sort in parallel.
461
        parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
462
      } //parallel
463
 
464
      delete[] __starts;
465
      delete[] __sd._M_temporary;
466
 
467
      if (!__exact)
468
        {
469
          for (_DifferenceType __i = 0; __i < __size; ++__i)
470
            __sd._M_samples[__i].~_ValueType();
471
          ::operator delete(__sd._M_samples);
472
        }
473
 
474
      delete[] __sd._M_offsets;
475
      delete[] __sd._M_pieces;
476
    }
477
 
478
} //namespace __gnu_parallel
479
 
480
#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.