1 |
786 |
skrzyp |
// This file is part of the uSTL library, an STL implementation.
|
2 |
|
|
//
|
3 |
|
|
// Copyright (c) 2005-2009 by Mike Sharov <msharov@users.sourceforge.net>
|
4 |
|
|
// This file is free software, distributed under the MIT License.
|
5 |
|
|
//
|
6 |
|
|
/// \file uutility.h
|
7 |
|
|
/// \brief Utility templates.
|
8 |
|
|
|
9 |
|
|
#ifndef UUTILITY_H_6A58BD296269A82A4AAAA4FD19FDB3AC
|
10 |
|
|
#define UUTILITY_H_6A58BD296269A82A4AAAA4FD19FDB3AC
|
11 |
|
|
|
12 |
|
|
#include "utypes.h"
|
13 |
|
|
#include "traits.h"
|
14 |
|
|
#include "ulimits.h"
|
15 |
|
|
#include <assert.h>
|
16 |
|
|
|
17 |
|
|
namespace ustl {
|
18 |
|
|
|
19 |
|
|
#ifdef __GNUC__
|
20 |
|
|
/// Returns the number of elements in a static vector
|
21 |
|
|
#define VectorSize(v) (sizeof(v) / sizeof(*v))
|
22 |
|
|
#else
|
23 |
|
|
// Old compilers will not be able to evaluate *v on an empty vector.
|
24 |
|
|
// The tradeoff here is that VectorSize will not be able to measure arrays of local structs.
|
25 |
|
|
#define VectorSize(v) (sizeof(v) / ustl::size_of_elements(1, v))
|
26 |
|
|
#endif
|
27 |
|
|
|
28 |
|
|
/// Expands into a ptr,size expression for the given static vector; useful as link arguments.
|
29 |
|
|
#define VectorBlock(v) (v)+0, VectorSize(v) // +0 makes it work under gcc 2.95
|
30 |
|
|
/// Expands into a begin,end expression for the given static vector; useful for algorithm arguments.
|
31 |
|
|
#define VectorRange(v) VectorBlock(v)+(v)
|
32 |
|
|
|
33 |
|
|
/// Indexes into a static array with bounds limit
|
34 |
|
|
#define VectorElement(v,i) v[min(uoff_t(i),uoff_t(VectorSize(v)-1))]
|
35 |
|
|
|
36 |
|
|
/// Returns the number of bits in the given type
|
37 |
|
|
#define BitsInType(t) (sizeof(t) * CHAR_BIT)
|
38 |
|
|
|
39 |
|
|
/// Returns the mask of type \p t with the lowest \p n bits set.
|
40 |
|
|
#define BitMask(t,n) (t(~t(0)) >> ((sizeof(t) * CHAR_BIT) - (n)))
|
41 |
|
|
|
42 |
|
|
/// Argument that is used only in debug builds (as in an assert)
|
43 |
|
|
#ifndef NDEBUG
|
44 |
|
|
#define DebugArg(x) x
|
45 |
|
|
#else
|
46 |
|
|
#define DebugArg(x)
|
47 |
|
|
#endif
|
48 |
|
|
|
49 |
|
|
/// Shorthand for container iteration.
|
50 |
|
|
#define foreach(type,i,ctr) for (type i = (ctr).begin(); i != (ctr).end(); ++ i)
|
51 |
|
|
/// Shorthand for container reverse iteration.
|
52 |
|
|
#define eachfor(type,i,ctr) for (type i = (ctr).rbegin(); i != (ctr).rend(); ++ i)
|
53 |
|
|
|
54 |
|
|
#ifndef DOXYGEN_SHOULD_SKIP_THIS
|
55 |
|
|
// Macro for passing template types as macro arguments.
|
56 |
|
|
// These are deprecated. Use metamac macros instead. Will remove by next release.
|
57 |
|
|
#define TEMPLATE_FULL_DECL1(d1,t1) template <d1 t1>
|
58 |
|
|
#define TEMPLATE_FULL_DECL2(d1,t1,d2,t2) template <d1 t1, d2 t2>
|
59 |
|
|
#define TEMPLATE_FULL_DECL3(d1,t1,d2,t2,d3,t3) template <d1 t1, d2 t2, d3 t3>
|
60 |
|
|
#define TEMPLATE_DECL1(t1) TEMPLATE_FULL_DECL1(typename,t1)
|
61 |
|
|
#define TEMPLATE_DECL2(t1,t2) TEMPLATE_FULL_DECL2(typename,t1,typename,t2)
|
62 |
|
|
#define TEMPLATE_DECL3(t1,t2,t3) TEMPLATE_FULL_DECL3(typename,t1,typename,t2,typename,t3)
|
63 |
|
|
#define TEMPLATE_TYPE1(type,a1) type<a1>
|
64 |
|
|
#define TEMPLATE_TYPE2(type,a1,a2) type<a1,a2>
|
65 |
|
|
#define TEMPLATE_TYPE3(type,a1,a2,a3) type<a1,a2,a3>
|
66 |
|
|
#endif
|
67 |
|
|
|
68 |
|
|
/// Returns the minimum of \p a and \p b
|
69 |
|
|
template <typename T1, typename T2>
|
70 |
|
|
inline T1 min (const T1& a, const T2& b)
|
71 |
|
|
{
|
72 |
|
|
return (a < b ? a : b);
|
73 |
|
|
}
|
74 |
|
|
|
75 |
|
|
/// Returns the maximum of \p a and \p b
|
76 |
|
|
template <typename T1, typename T2>
|
77 |
|
|
inline T1 max (const T1& a, const T2& b)
|
78 |
|
|
{
|
79 |
|
|
return (b < a ? a : b);
|
80 |
|
|
}
|
81 |
|
|
|
82 |
|
|
/// The alignment performed by default.
|
83 |
|
|
const size_t c_DefaultAlignment = __alignof__(void*);
|
84 |
|
|
|
85 |
|
|
/// \brief Rounds \p n up to be divisible by \p grain
|
86 |
|
|
template <typename T>
|
87 |
|
|
inline T Align (T n, size_t grain = c_DefaultAlignment)
|
88 |
|
|
{
|
89 |
|
|
n += grain - 1;
|
90 |
|
|
return (n - n % grain);
|
91 |
|
|
}
|
92 |
|
|
|
93 |
|
|
/// Returns a NULL pointer cast to T.
|
94 |
|
|
template <typename T>
|
95 |
|
|
inline T* NullPointer (void)
|
96 |
|
|
{ return ((T*) NULL); }
|
97 |
|
|
|
98 |
|
|
/// \brief Returns a non-dereferentiable value reference.
|
99 |
|
|
/// This is useful for passing to alignof or the like which need a value but
|
100 |
|
|
/// don't need to actually use it.
|
101 |
|
|
template <typename T>
|
102 |
|
|
inline T& NullValue (void)
|
103 |
|
|
{ return (*NullPointer<T>()); }
|
104 |
|
|
|
105 |
|
|
/// Offsets an iterator
|
106 |
|
|
template <typename T>
|
107 |
|
|
inline T advance (T i, ssize_t offset)
|
108 |
|
|
{
|
109 |
|
|
return (i + offset);
|
110 |
|
|
}
|
111 |
|
|
|
112 |
|
|
#ifndef DOXYGEN_SHOULD_SKIP_THIS
|
113 |
|
|
/// Offsets a void pointer
|
114 |
|
|
template <>
|
115 |
|
|
inline const void* advance (const void* p, ssize_t offset)
|
116 |
|
|
{
|
117 |
|
|
assert (p || !offset);
|
118 |
|
|
return (reinterpret_cast<const uint8_t*>(p) + offset);
|
119 |
|
|
}
|
120 |
|
|
|
121 |
|
|
/// Offsets a void pointer
|
122 |
|
|
template <>
|
123 |
|
|
inline void* advance (void* p, ssize_t offset)
|
124 |
|
|
{
|
125 |
|
|
assert (p || !offset);
|
126 |
|
|
return (reinterpret_cast<uint8_t*>(p) + offset);
|
127 |
|
|
}
|
128 |
|
|
#endif
|
129 |
|
|
|
130 |
|
|
/// Returns the difference \p p1 - \p p2
|
131 |
|
|
template <typename T1, typename T2>
|
132 |
|
|
inline ptrdiff_t distance (T1 i1, T2 i2)
|
133 |
|
|
{
|
134 |
|
|
return (i2 - i1);
|
135 |
|
|
}
|
136 |
|
|
|
137 |
|
|
#ifndef DOXYGEN_SHOULD_SKIP_THIS
|
138 |
|
|
#define UNVOID_DISTANCE(T1const,T2const) \
|
139 |
|
|
template <> inline ptrdiff_t distance (T1const void* p1, T2const void* p2) \
|
140 |
|
|
{ return ((T2const uint8_t*)(p2) - (T1const uint8_t*)(p1)); }
|
141 |
|
|
UNVOID_DISTANCE(,)
|
142 |
|
|
UNVOID_DISTANCE(const,const)
|
143 |
|
|
UNVOID_DISTANCE(,const)
|
144 |
|
|
UNVOID_DISTANCE(const,)
|
145 |
|
|
#undef UNVOID_DISTANCE
|
146 |
|
|
#endif
|
147 |
|
|
|
148 |
|
|
#ifndef DOXYGEN_SHOULD_SKIP_THIS
|
149 |
|
|
// The compiler issues a warning if an unsigned type is compared to 0.
|
150 |
|
|
template <typename T, bool IsSigned> struct __is_negative { inline bool operator()(const T& v) { return (v < 0); } };
|
151 |
|
|
template <typename T> struct __is_negative<T,false> { inline bool operator()(const T&) { return (false); } };
|
152 |
|
|
/// Warning-free way to check if \p v is negative, even if for unsigned types.
|
153 |
|
|
template <typename T> inline bool is_negative (const T& v) { return (__is_negative<T,numeric_limits<T>::is_signed>()(v)); }
|
154 |
|
|
#endif
|
155 |
|
|
|
156 |
|
|
/// \brief Returns the absolute value of \p v
|
157 |
|
|
/// Unlike the stdlib functions, this is inline and works with all types.
|
158 |
|
|
template <typename T>
|
159 |
|
|
inline T absv (T v)
|
160 |
|
|
{
|
161 |
|
|
return (is_negative(v) ? -v : v);
|
162 |
|
|
}
|
163 |
|
|
|
164 |
|
|
/// \brief Returns -1 for negative values, 1 for positive, and 0 for 0
|
165 |
|
|
template <typename T>
|
166 |
|
|
inline T sign (T v)
|
167 |
|
|
{
|
168 |
|
|
return ((0 < v) - is_negative(v));
|
169 |
|
|
}
|
170 |
|
|
|
171 |
|
|
/// Returns the absolute value of the distance i1 and i2
|
172 |
|
|
template <typename T1, typename T2>
|
173 |
|
|
inline size_t abs_distance (T1 i1, T2 i2)
|
174 |
|
|
{
|
175 |
|
|
return (absv (distance(i1, i2)));
|
176 |
|
|
}
|
177 |
|
|
|
178 |
|
|
/// Returns the size of \p n elements of size \p T
|
179 |
|
|
template <typename T>
|
180 |
|
|
inline size_t size_of_elements (size_t n, const T*)
|
181 |
|
|
{
|
182 |
|
|
return (n * sizeof(T));
|
183 |
|
|
}
|
184 |
|
|
|
185 |
|
|
// Defined in byteswap.h, which is usually unusable.
|
186 |
|
|
#undef bswap_16
|
187 |
|
|
#undef bswap_32
|
188 |
|
|
#undef bswap_64
|
189 |
|
|
|
190 |
|
|
inline uint16_t bswap_16 (uint16_t v)
|
191 |
|
|
{
|
192 |
|
|
#if CPU_HAS_CMPXCHG8 // If it has that, it has bswap.
|
193 |
|
|
if (!__builtin_constant_p(v)) asm ("rorw $8, %0":"+r"(v)); else
|
194 |
|
|
#endif
|
195 |
|
|
v = v << 8 | v >> 8;
|
196 |
|
|
return (v);
|
197 |
|
|
}
|
198 |
|
|
inline uint32_t bswap_32 (uint32_t v)
|
199 |
|
|
{
|
200 |
|
|
#if CPU_HAS_CMPXCHG8
|
201 |
|
|
if (!__builtin_constant_p(v)) asm ("bswap %0":"+r"(v)); else
|
202 |
|
|
#endif
|
203 |
|
|
v = v << 24 | (v & 0xFF00) << 8 | ((v >> 8) & 0xFF00) | v >> 24;
|
204 |
|
|
return (v);
|
205 |
|
|
}
|
206 |
|
|
#if HAVE_INT64_T
|
207 |
|
|
inline uint64_t bswap_64 (uint64_t v)
|
208 |
|
|
{
|
209 |
|
|
#if x86_64
|
210 |
|
|
if (!__builtin_constant_p(v)) asm ("bswap %0":"+r"(v)); else
|
211 |
|
|
#endif
|
212 |
|
|
v = (uint64_t(bswap_32(v)) << 32) | bswap_32(v >> 32);
|
213 |
|
|
return (v);
|
214 |
|
|
}
|
215 |
|
|
#endif
|
216 |
|
|
|
217 |
|
|
/// \brief Swaps the byteorder of \p v.
|
218 |
|
|
template <typename T>
|
219 |
|
|
inline T bswap (const T& v)
|
220 |
|
|
{
|
221 |
|
|
switch (BitsInType(T)) {
|
222 |
|
|
default: return (v);
|
223 |
|
|
case 16: return (T (bswap_16 (uint16_t (v))));
|
224 |
|
|
case 32: return (T (bswap_32 (uint32_t (v))));
|
225 |
|
|
#if HAVE_INT64_T
|
226 |
|
|
case 64: return (T (bswap_64 (uint64_t (v))));
|
227 |
|
|
#endif
|
228 |
|
|
}
|
229 |
|
|
}
|
230 |
|
|
|
231 |
|
|
#if BYTE_ORDER == BIG_ENDIAN
|
232 |
|
|
template <typename T> inline T le_to_native (const T& v) { return (bswap (v)); }
|
233 |
|
|
template <typename T> inline T be_to_native (const T& v) { return (v); }
|
234 |
|
|
template <typename T> inline T native_to_le (const T& v) { return (bswap (v)); }
|
235 |
|
|
template <typename T> inline T native_to_be (const T& v) { return (v); }
|
236 |
|
|
#elif BYTE_ORDER == LITTLE_ENDIAN
|
237 |
|
|
template <typename T> inline T le_to_native (const T& v) { return (v); }
|
238 |
|
|
template <typename T> inline T be_to_native (const T& v) { return (bswap (v)); }
|
239 |
|
|
template <typename T> inline T native_to_le (const T& v) { return (v); }
|
240 |
|
|
template <typename T> inline T native_to_be (const T& v) { return (bswap (v)); }
|
241 |
|
|
#endif // BYTE_ORDER
|
242 |
|
|
|
243 |
|
|
/// Deletes \p p and sets it to NULL
|
244 |
|
|
template <typename T>
|
245 |
|
|
inline void Delete (T*& p)
|
246 |
|
|
{
|
247 |
|
|
delete p;
|
248 |
|
|
p = NULL;
|
249 |
|
|
}
|
250 |
|
|
|
251 |
|
|
/// Deletes \p p as an array and sets it to NULL
|
252 |
|
|
template <typename T>
|
253 |
|
|
inline void DeleteVector (T*& p)
|
254 |
|
|
{
|
255 |
|
|
delete [] p;
|
256 |
|
|
p = NULL;
|
257 |
|
|
}
|
258 |
|
|
|
259 |
|
|
/// Template of making != from ! and ==
|
260 |
|
|
template <typename T>
|
261 |
|
|
inline bool operator!= (const T& x, const T& y)
|
262 |
|
|
{
|
263 |
|
|
return (!(x == y));
|
264 |
|
|
}
|
265 |
|
|
|
266 |
|
|
/// Template of making > from <
|
267 |
|
|
template <typename T>
|
268 |
|
|
inline bool operator> (const T& x, const T& y)
|
269 |
|
|
{
|
270 |
|
|
return (y < x);
|
271 |
|
|
}
|
272 |
|
|
|
273 |
|
|
/// Template of making <= from < and ==
|
274 |
|
|
template <typename T>
|
275 |
|
|
inline bool operator<= (const T& x, const T& y)
|
276 |
|
|
{
|
277 |
|
|
return (!(y < x));
|
278 |
|
|
}
|
279 |
|
|
|
280 |
|
|
/// Template of making >= from < and ==
|
281 |
|
|
template <typename T>
|
282 |
|
|
inline bool operator>= (const T& x, const T& y)
|
283 |
|
|
{
|
284 |
|
|
return (!(x < y));
|
285 |
|
|
}
|
286 |
|
|
|
287 |
|
|
/// Packs \p s multiple times into \p b. Useful for loop unrolling.
|
288 |
|
|
template <typename TSmall, typename TBig>
|
289 |
|
|
inline void pack_type (TSmall s, TBig& b)
|
290 |
|
|
{
|
291 |
|
|
const size_t n = sizeof(TBig) / sizeof(TSmall);
|
292 |
|
|
b = s;
|
293 |
|
|
// Calls to min are here to avoid warnings for shifts bigger than the type. min will be gone when optimized.
|
294 |
|
|
if (n < 2) return;
|
295 |
|
|
b = (b << min (BitsInType(TSmall), BitsInType(TBig))) | b;
|
296 |
|
|
if (n < 4) return;
|
297 |
|
|
b = (b << min (BitsInType(TSmall) * 2, BitsInType(TBig))) | b;
|
298 |
|
|
if (n < 8) return;
|
299 |
|
|
b = (b << min (BitsInType(TSmall) * 4, BitsInType(TBig))) | b;
|
300 |
|
|
}
|
301 |
|
|
|
302 |
|
|
/// \brief Divides \p n1 by \p n2 and rounds the result up.
|
303 |
|
|
/// This is in contrast to regular division, which rounds down.
|
304 |
|
|
template <typename T1, typename T2>
|
305 |
|
|
inline T1 DivRU (T1 n1, T2 n2)
|
306 |
|
|
{
|
307 |
|
|
T2 adj = n2 - 1;
|
308 |
|
|
if (is_negative (n1))
|
309 |
|
|
adj = -adj;
|
310 |
|
|
return ((n1 + adj) / n2);
|
311 |
|
|
}
|
312 |
|
|
|
313 |
|
|
#if __GNUC__ >= 3
|
314 |
|
|
inline bool TestAndSet (int* pm) INLINE;
|
315 |
|
|
#endif
|
316 |
|
|
/// Sets the contents of \p pm to 1 and returns true if the previous value was 0.
|
317 |
|
|
inline bool TestAndSet (int* pm)
|
318 |
|
|
{
|
319 |
|
|
#if CPU_HAS_CMPXCHG8
|
320 |
|
|
bool rv;
|
321 |
|
|
int oldVal (1);
|
322 |
|
|
asm volatile ( // cmpxchg compares to %eax and swaps if equal
|
323 |
|
|
"cmpxchgl %3, %1\n\t"
|
324 |
|
|
"sete %0"
|
325 |
|
|
: "=a" (rv), "=m" (*pm), "=r" (oldVal)
|
326 |
|
|
: "2" (oldVal), "a" (0)
|
327 |
|
|
: "memory");
|
328 |
|
|
return (rv);
|
329 |
|
|
#elif __i386__ || __x86_64__
|
330 |
|
|
int oldVal (1);
|
331 |
|
|
asm volatile ("xchgl %0, %1" : "=r"(oldVal), "=m"(*pm) : "0"(oldVal), "m"(*pm) : "memory");
|
332 |
|
|
return (!oldVal);
|
333 |
|
|
#elif __sparc32__ // This has not been tested
|
334 |
|
|
int rv;
|
335 |
|
|
asm volatile ("ldstub %1, %0" : "=r"(rv), "=m"(*pm) : "m"(pm));
|
336 |
|
|
return (!rv);
|
337 |
|
|
#else
|
338 |
|
|
const int oldVal (*pm);
|
339 |
|
|
*pm = 1;
|
340 |
|
|
return (!oldVal);
|
341 |
|
|
#endif
|
342 |
|
|
}
|
343 |
|
|
|
344 |
|
|
inline uint32_t NextPow2 (uint32_t v)
|
345 |
|
|
{
|
346 |
|
|
#if __i386__ || __x86_64__
|
347 |
|
|
asm("dec\t%1\n\t"
|
348 |
|
|
"mov\t$1,%0\n\t"
|
349 |
|
|
"bsr\t%1,%1\n\t"
|
350 |
|
|
"inc\t%1\n\t"
|
351 |
|
|
"rol\t%b1,%0":"=&r"(v):"c"(v));
|
352 |
|
|
return (v);
|
353 |
|
|
#else
|
354 |
|
|
// The following code is sub-optimal but mimics the x86 implementation
|
355 |
|
|
int i = 31;
|
356 |
|
|
v--;
|
357 |
|
|
while (!(v & (1 << i)) && i > 0) i--;
|
358 |
|
|
if (i == 31)
|
359 |
|
|
return 1;
|
360 |
|
|
return (1 << (i + 1));
|
361 |
|
|
#endif
|
362 |
|
|
}
|
363 |
|
|
|
364 |
|
|
/// \brief This template is to be used for dereferencing a type-punned pointer without a warning.
|
365 |
|
|
///
|
366 |
|
|
/// When casting a local variable to an unrelated type through a pointer (for
|
367 |
|
|
/// example, casting a float to a uint32_t without conversion), the resulting
|
368 |
|
|
/// memory location can be accessed through either pointer, which violates the
|
369 |
|
|
/// strict aliasing rule. While -fno-strict-aliasing option can be given to
|
370 |
|
|
/// the compiler, eliminating this warning, inefficient code may result in
|
371 |
|
|
/// some instances, because aliasing inhibits some optimizations. By using
|
372 |
|
|
/// this template, and by ensuring the memory is accessed in one way only,
|
373 |
|
|
/// efficient code can be produced without the warning. For gcc 4.1.0+.
|
374 |
|
|
///
|
375 |
|
|
template <typename DEST, typename SRC>
|
376 |
|
|
inline DEST noalias (const DEST&, SRC* s)
|
377 |
|
|
{
|
378 |
|
|
asm("":"+g"(s)::"memory");
|
379 |
|
|
union UPun { SRC s; DEST d; };
|
380 |
|
|
return (((UPun*)(s))->d);
|
381 |
|
|
}
|
382 |
|
|
|
383 |
|
|
template <typename DEST, typename SRC>
|
384 |
|
|
inline DEST noalias_cast (SRC s)
|
385 |
|
|
{
|
386 |
|
|
asm("":"+g"(s)::"memory");
|
387 |
|
|
union { SRC s; DEST d; } u = {s};
|
388 |
|
|
return (u.d);
|
389 |
|
|
}
|
390 |
|
|
|
391 |
|
|
namespace simd {
|
392 |
|
|
/// Call after you are done using SIMD algorithms for 64 bit tuples.
|
393 |
|
|
inline void reset_mmx (void) INLINE;
|
394 |
|
|
#define ALL_MMX_REGS_CHANGELIST "mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7","st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)"
|
395 |
|
|
#if CPU_HAS_3DNOW
|
396 |
|
|
inline void reset_mmx (void) { asm ("femms":::ALL_MMX_REGS_CHANGELIST); }
|
397 |
|
|
#elif CPU_HAS_MMX
|
398 |
|
|
inline void reset_mmx (void) { asm ("emms":::ALL_MMX_REGS_CHANGELIST); }
|
399 |
|
|
#else
|
400 |
|
|
inline void reset_mmx (void) {}
|
401 |
|
|
#endif
|
402 |
|
|
} // namespace simd
|
403 |
|
|
} // namespace ustl
|
404 |
|
|
|
405 |
|
|
#endif
|