OpenCores
URL https://opencores.org/ocsvn/altor32/altor32/trunk

Subversion Repositories altor32

[/] [altor32/] [trunk/] [gcc-x64/] [or1knd-elf/] [or1knd-elf/] [include/] [c++/] [4.8.0/] [parallel/] [settings.h] - Blame information for rev 35

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 35 ultra_embe
// -*- C++ -*-
2
 
3
// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
4
//
5
// This file is part of the GNU ISO C++ Library.  This library is free
6
// software; you can redistribute it and/or modify it under the terms
7
// of the GNU General Public License as published by the Free Software
8
// Foundation; either version 3, or (at your option) any later
9
// version.
10
 
11
// This library is distributed in the hope that it will be useful, but
12
// WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
// General Public License for more details.
15
 
16
// Under Section 7 of GPL version 3, you are granted additional
17
// permissions described in the GCC Runtime Library Exception, version
18
// 3.1, as published by the Free Software Foundation.
19
 
20
// You should have received a copy of the GNU General Public License and
21
// a copy of the GCC Runtime Library Exception along with this program;
22
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23
// <http://www.gnu.org/licenses/>.
24
 
25
/** @file parallel/settings.h
26
 *  @brief Runtime settings and tuning parameters, heuristics to decide
27
 *  whether to use parallelized algorithms.
28
 *  This file is a GNU parallel extension to the Standard C++ Library.
29
 *
30
 *  @section parallelization_decision
31
 *  The decision whether to run an algorithm in parallel.
32
 *
33
 *  There are several ways the user can switch on and __off the parallel
34
 *  execution of an algorithm, both at compile- and run-time.
35
 *
36
 *  Only sequential execution can be forced at compile-time.  This
37
 *  reduces code size and protects code parts that have
38
 *  non-thread-safe side effects.
39
 *
40
 *  Ultimately, forcing parallel execution at compile-time makes
41
 *  sense.  Often, the sequential algorithm implementation is used as
42
 *  a subroutine, so no reduction in code size can be achieved.  Also,
43
 *  the machine the program is run on might have only one processor
44
 *  core, so to avoid overhead, the algorithm is executed
45
 *  sequentially.
46
 *
47
 *  To force sequential execution of an algorithm ultimately at
48
 *  compile-time, the user must add the tag
49
*  gnu_parallel::sequential_tag() to the end of the parameter list,
50
 *  e. g.
51
 *
52
 *  \code
53
 *  std::sort(__v.begin(), __v.end(), __gnu_parallel::sequential_tag());
54
 *  \endcode
55
 *
56
 *  This is compatible with all overloaded algorithm variants.  No
57
 *  additional code will be instantiated, at all.  The same holds for
58
 *  most algorithm calls with iterators not providing random access.
59
 *
60
 *  If the algorithm call is not forced to be executed sequentially
61
 *  at compile-time, the decision is made at run-time.
62
 *  The global variable __gnu_parallel::_Settings::algorithm_strategy
63
 *  is checked. _It is a tristate variable corresponding to:
64
 *
65
 *  a. force_sequential, meaning the sequential algorithm is executed.
66
*  b. force_parallel, meaning the parallel algorithm is executed.
67
*  c. heuristic
68
 *
69
 *  For heuristic, the parallel algorithm implementation is called
70
 *  only if the input size is sufficiently large.  For most
71
 *  algorithms, the input size is the (combined) length of the input
72
*  sequence(__s).  The threshold can be set by the user, individually
73
 *  for each algorithm.  The according variables are called
74
*  gnu_parallel::_Settings::[algorithm]_minimal_n .
75
 *
76
 *  For some of the algorithms, there are even more tuning options,
77
 *  e. g. the ability to choose from multiple algorithm variants.  See
78
 *  below for details.
79
 */
80
 
81
// Written by Johannes Singler and Felix Putze.
82
 
83
#ifndef _GLIBCXX_PARALLEL_SETTINGS_H
84
#define _GLIBCXX_PARALLEL_SETTINGS_H 1
85
 
86
#include <parallel/types.h>
87
 
88
/**
89
  * @brief Determine at compile(?)-time if the parallel variant of an
90
  * algorithm should be called.
91
  * @param __c A condition that is convertible to bool that is overruled by
92
  * __gnu_parallel::_Settings::algorithm_strategy. Usually a decision
93
  * based on the input size.
94
  */
95
#define _GLIBCXX_PARALLEL_CONDITION(__c) \
96
  (__gnu_parallel::_Settings::get().algorithm_strategy \
97
    != __gnu_parallel::force_sequential \
98
  && ((__gnu_parallel::__get_max_threads() > 1 && (__c)) \
99
     || __gnu_parallel::_Settings::get().algorithm_strategy \
100
        == __gnu_parallel::force_parallel))
101
 
102
/*
103
inline bool
104
parallel_condition(bool __c)
105
{
106
  bool ret = false;
107
  const _Settings& __s = _Settings::get();
108
  if (__s.algorithm_strategy != force_seqential)
109
    {
110
      if (__s.algorithm_strategy == force_parallel)
111
        ret = true;
112
      else
113
        ret = __get_max_threads() > 1 && __c;
114
    }
115
  return ret;
116
}
117
*/
118
 
119
namespace __gnu_parallel
120
{
121
  /// class _Settings
122
  /// Run-time settings for the parallel mode including all tunable parameters.
123
  struct _Settings
124
  {
125
    _AlgorithmStrategy          algorithm_strategy;
126
 
127
    _SortAlgorithm              sort_algorithm;
128
    _PartialSumAlgorithm        partial_sum_algorithm;
129
    _MultiwayMergeAlgorithm     multiway_merge_algorithm;
130
    _FindAlgorithm              find_algorithm;
131
 
132
    _SplittingAlgorithm         sort_splitting;
133
    _SplittingAlgorithm         merge_splitting;
134
    _SplittingAlgorithm         multiway_merge_splitting;
135
 
136
    // Per-algorithm settings.
137
 
138
    /// Minimal input size for accumulate.
139
    _SequenceIndex              accumulate_minimal_n;
140
 
141
    /// Minimal input size for adjacent_difference.
142
    unsigned int                adjacent_difference_minimal_n;
143
 
144
    /// Minimal input size for count and count_if.
145
    _SequenceIndex              count_minimal_n;
146
 
147
    /// Minimal input size for fill.
148
    _SequenceIndex              fill_minimal_n;
149
 
150
    /// Block size increase factor for find.
151
    double                      find_increasing_factor;
152
 
153
    /// Initial block size for find.
154
    _SequenceIndex              find_initial_block_size;
155
 
156
    /// Maximal block size for find.
157
    _SequenceIndex              find_maximum_block_size;
158
 
159
    /// Start with looking for this many elements sequentially, for find.
160
    _SequenceIndex              find_sequential_search_size;
161
 
162
    /// Minimal input size for for_each.
163
    _SequenceIndex              for_each_minimal_n;
164
 
165
    /// Minimal input size for generate.
166
    _SequenceIndex              generate_minimal_n;
167
 
168
    /// Minimal input size for max_element.
169
    _SequenceIndex              max_element_minimal_n;
170
 
171
    /// Minimal input size for merge.
172
    _SequenceIndex              merge_minimal_n;
173
 
174
    /// Oversampling factor for merge.
175
    unsigned int                merge_oversampling;
176
 
177
    /// Minimal input size for min_element.
178
    _SequenceIndex              min_element_minimal_n;
179
 
180
    /// Minimal input size for multiway_merge.
181
    _SequenceIndex              multiway_merge_minimal_n;
182
 
183
    /// Oversampling factor for multiway_merge.
184
    int                         multiway_merge_minimal_k;
185
 
186
    /// Oversampling factor for multiway_merge.
187
    unsigned int                multiway_merge_oversampling;
188
 
189
    /// Minimal input size for nth_element.
190
    _SequenceIndex              nth_element_minimal_n;
191
 
192
    /// Chunk size for partition.
193
    _SequenceIndex              partition_chunk_size;
194
 
195
    /// Chunk size for partition, relative to input size.  If > 0.0,
196
    /// this value overrides partition_chunk_size.
197
    double                      partition_chunk_share;
198
 
199
    /// Minimal input size for partition.
200
    _SequenceIndex              partition_minimal_n;
201
 
202
    /// Minimal input size for partial_sort.
203
    _SequenceIndex              partial_sort_minimal_n;
204
 
205
    /// Ratio for partial_sum. Assume "sum and write result" to be
206
    /// this factor slower than just "sum".
207
    float                       partial_sum_dilation;
208
 
209
    /// Minimal input size for partial_sum.
210
    unsigned int                partial_sum_minimal_n;
211
 
212
    /// Minimal input size for random_shuffle.
213
    unsigned int                random_shuffle_minimal_n;
214
 
215
    /// Minimal input size for replace and replace_if.
216
    _SequenceIndex              replace_minimal_n;
217
 
218
    /// Minimal input size for set_difference.
219
    _SequenceIndex              set_difference_minimal_n;
220
 
221
    /// Minimal input size for set_intersection.
222
    _SequenceIndex              set_intersection_minimal_n;
223
 
224
    /// Minimal input size for set_symmetric_difference.
225
    _SequenceIndex              set_symmetric_difference_minimal_n;
226
 
227
    /// Minimal input size for set_union.
228
    _SequenceIndex              set_union_minimal_n;
229
 
230
    /// Minimal input size for parallel sorting.
231
    _SequenceIndex              sort_minimal_n;
232
 
233
    /// Oversampling factor for parallel std::sort (MWMS).
234
    unsigned int                sort_mwms_oversampling;
235
 
236
    /// Such many samples to take to find a good pivot (quicksort).
237
    unsigned int                sort_qs_num_samples_preset;
238
 
239
    /// Maximal subsequence __length to switch to unbalanced __base case.
240
    /// Applies to std::sort with dynamically load-balanced quicksort.
241
    _SequenceIndex              sort_qsb_base_case_maximal_n;
242
 
243
    /// Minimal input size for parallel std::transform.
244
    _SequenceIndex              transform_minimal_n;
245
 
246
    /// Minimal input size for unique_copy. 
247
    _SequenceIndex              unique_copy_minimal_n;
248
 
249
    _SequenceIndex              workstealing_chunk_size;
250
 
251
    // Hardware dependent tuning parameters.
252
 
253
    /// size of the L1 cache in bytes (underestimation).
254
    unsigned long long          L1_cache_size;
255
 
256
    /// size of the L2 cache in bytes (underestimation).
257
    unsigned long long          L2_cache_size;
258
 
259
    /// size of the Translation Lookaside Buffer (underestimation).
260
    unsigned int                TLB_size;
261
 
262
    /// Overestimation of cache line size.  Used to avoid false
263
    /// sharing, i.e. elements of different threads are at least this
264
    /// amount apart.
265
    unsigned int                cache_line_size;
266
 
267
    // Statistics.
268
 
269
    /// The number of stolen ranges in load-balanced quicksort.
270
    _SequenceIndex              qsb_steals;
271
 
272
    /// Minimal input size for search and search_n.
273
    _SequenceIndex              search_minimal_n;
274
 
275
    /// Block size scale-down factor with respect to current position.
276
    float                       find_scale_factor;
277
 
278
    /// Get the global settings.
279
    _GLIBCXX_CONST static const _Settings&
280
    get() throw();
281
 
282
    /// Set the global settings.
283
    static void
284
    set(_Settings&) throw();
285
 
286
    explicit
287
    _Settings() :
288
            algorithm_strategy(heuristic),
289
            sort_algorithm(MWMS),
290
            partial_sum_algorithm(LINEAR),
291
            multiway_merge_algorithm(LOSER_TREE),
292
            find_algorithm(CONSTANT_SIZE_BLOCKS),
293
            sort_splitting(EXACT),
294
            merge_splitting(EXACT),
295
            multiway_merge_splitting(EXACT),
296
            accumulate_minimal_n(1000),
297
            adjacent_difference_minimal_n(1000),
298
            count_minimal_n(1000),
299
            fill_minimal_n(1000),
300
            find_increasing_factor(2.0),
301
            find_initial_block_size(256),
302
            find_maximum_block_size(8192),
303
            find_sequential_search_size(256),
304
            for_each_minimal_n(1000),
305
            generate_minimal_n(1000),
306
            max_element_minimal_n(1000),
307
            merge_minimal_n(1000),
308
            merge_oversampling(10),
309
            min_element_minimal_n(1000),
310
            multiway_merge_minimal_n(1000),
311
            multiway_merge_minimal_k(2), multiway_merge_oversampling(10),
312
            nth_element_minimal_n(1000),
313
            partition_chunk_size(1000),
314
            partition_chunk_share(0.0),
315
            partition_minimal_n(1000),
316
            partial_sort_minimal_n(1000),
317
            partial_sum_dilation(1.0f),
318
            partial_sum_minimal_n(1000),
319
            random_shuffle_minimal_n(1000),
320
            replace_minimal_n(1000),
321
            set_difference_minimal_n(1000),
322
            set_intersection_minimal_n(1000),
323
            set_symmetric_difference_minimal_n(1000),
324
            set_union_minimal_n(1000),
325
            sort_minimal_n(1000),
326
            sort_mwms_oversampling(10),
327
            sort_qs_num_samples_preset(100),
328
            sort_qsb_base_case_maximal_n(100),
329
            transform_minimal_n(1000),
330
            unique_copy_minimal_n(10000),
331
            workstealing_chunk_size(100),
332
            L1_cache_size(16 << 10),
333
            L2_cache_size(256 << 10),
334
            TLB_size(128),
335
            cache_line_size(64),
336
            qsb_steals(0),
337
            search_minimal_n(1000),
338
            find_scale_factor(0.01f)
339
    { }
340
  };
341
}
342
 
343
#endif /* _GLIBCXX_PARALLEL_SETTINGS_H */

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.