1 |
786 |
skrzyp |
// This file is part of the uSTL library, an STL implementation.
|
2 |
|
|
//
|
3 |
|
|
// Copyright (c) 2005-2009 by Mike Sharov <msharov@users.sourceforge.net>
|
4 |
|
|
// This file is free software, distributed under the MIT License.
|
5 |
|
|
|
6 |
|
|
#ifndef USTRING_H_1249CB7A098A9010763AAC6D37B133CF
|
7 |
|
|
#define USTRING_H_1249CB7A098A9010763AAC6D37B133CF
|
8 |
|
|
|
9 |
|
|
#include "memblock.h"
|
10 |
|
|
#include "utf8.h"
|
11 |
|
|
#include <stdarg.h> // for va_list, va_start, and va_end (in string::format)
|
12 |
|
|
|
13 |
|
|
namespace ustl {
|
14 |
|
|
|
15 |
|
|
/// \class string ustring.h ustl.h
|
16 |
|
|
/// \ingroup Sequences
|
17 |
|
|
///
|
18 |
|
|
/// \brief STL basic_string<char> equivalent.
|
19 |
|
|
///
|
20 |
|
|
/// An STL container for text string manipulation.
|
21 |
|
|
/// Differences from C++ standard:
|
22 |
|
|
/// - string is a class, not a template. Wide characters are assumed to be
|
23 |
|
|
/// encoded with utf8 at all times except when rendering or editing,
|
24 |
|
|
/// where you would use a utf8 iterator.
|
25 |
|
|
/// - format member function - you can, of course use an \ref ostringstream,
|
26 |
|
|
/// which also have format functions, but most of the time this way
|
27 |
|
|
/// is more convenient. Because uSTL does not implement locales,
|
28 |
|
|
/// format is the only way to create localized strings.
|
29 |
|
|
/// - const char* cast operator. It is much clearer to use this than having
|
30 |
|
|
/// to type .c_str() every time.
|
31 |
|
|
/// - length returns the number of _characters_, not bytes.
|
32 |
|
|
/// This function is O(N), so use wisely.
|
33 |
|
|
///
|
34 |
|
|
/// An additional note is in order regarding the use of indexes. All indexes
|
35 |
|
|
/// passed in as arguments or returned by find are byte offsets, not character
|
36 |
|
|
/// offsets. Likewise, sizes are specified in bytes, not characters. The
|
37 |
|
|
/// rationale is that there is no way for you to know what is in the string.
|
38 |
|
|
/// There is no way for you to know how many characters are needed to express
|
39 |
|
|
/// one thing or another. The only thing you can do to a localized string is
|
40 |
|
|
/// search for delimiters and modify text between them as opaque blocks. If you
|
41 |
|
|
/// do anything else, you are hardcoding yourself into a locale! So stop it!
|
42 |
|
|
///
|
43 |
|
|
class string : public memblock {
|
44 |
|
|
public:
|
45 |
|
|
typedef char value_type;
|
46 |
|
|
typedef value_type* pointer;
|
47 |
|
|
typedef const value_type* const_pointer;
|
48 |
|
|
typedef wchar_t wvalue_type;
|
49 |
|
|
typedef wvalue_type* wpointer;
|
50 |
|
|
typedef const wvalue_type* const_wpointer;
|
51 |
|
|
typedef pointer iterator;
|
52 |
|
|
typedef const_pointer const_iterator;
|
53 |
|
|
typedef value_type& reference;
|
54 |
|
|
typedef value_type const_reference;
|
55 |
|
|
typedef ::ustl::reverse_iterator<iterator> reverse_iterator;
|
56 |
|
|
typedef ::ustl::reverse_iterator<const_iterator> const_reverse_iterator;
|
57 |
|
|
typedef utf8in_iterator<const_iterator> utf8_iterator;
|
58 |
|
|
static const size_type npos = INT_MAX; ///< Value that means the end of string.
|
59 |
|
|
public:
|
60 |
|
|
inline string (void) : memblock () { relink ("",0); }
|
61 |
|
|
string (const string& s);
|
62 |
|
|
inline string (const string& s, uoff_t o, size_type n);
|
63 |
|
|
inline explicit string (const cmemlink& l);
|
64 |
|
|
string (const_pointer s);
|
65 |
|
|
inline string (const_pointer s, size_type len);
|
66 |
|
|
inline string (const_pointer s1, const_pointer s2);
|
67 |
|
|
explicit string (size_type n, value_type c = 0);
|
68 |
|
|
inline pointer data (void) { return (string::pointer (memblock::data())); }
|
69 |
|
|
inline const_pointer c_str (void) const { return (string::const_pointer (memblock::cdata())); }
|
70 |
|
|
inline size_type max_size (void) const { size_type s (memblock::max_size()); return (s - !!s); }
|
71 |
|
|
inline size_type capacity (void) const { size_type c (memblock::capacity()); return (c - !!c); }
|
72 |
|
|
void resize (size_type n);
|
73 |
|
|
inline void resize (size_type n, value_type c);
|
74 |
|
|
inline void clear (void) { resize (0); }
|
75 |
|
|
inline const_iterator begin (void) const { return (const_iterator (memblock::begin())); }
|
76 |
|
|
inline iterator begin (void) { return (iterator (memblock::begin())); }
|
77 |
|
|
inline const_iterator end (void) const { return (const_iterator (memblock::end())); }
|
78 |
|
|
inline iterator end (void) { return (iterator (memblock::end())); }
|
79 |
|
|
inline const_reverse_iterator rbegin (void) const { return (const_reverse_iterator (end())); }
|
80 |
|
|
inline reverse_iterator rbegin (void) { return (reverse_iterator (end())); }
|
81 |
|
|
inline const_reverse_iterator rend (void) const { return (const_reverse_iterator (begin())); }
|
82 |
|
|
inline reverse_iterator rend (void) { return (reverse_iterator (begin())); }
|
83 |
|
|
inline utf8_iterator utf8_begin (void) const { return (utf8_iterator (begin())); }
|
84 |
|
|
inline utf8_iterator utf8_end (void) const { return (utf8_iterator (end())); }
|
85 |
|
|
inline const_reference at (uoff_t pos) const { assert (pos <= size() && begin()); return (begin()[pos]); }
|
86 |
|
|
inline reference at (uoff_t pos) { assert (pos <= size() && begin()); return (begin()[pos]); }
|
87 |
|
|
inline const_iterator iat (uoff_t pos) const { return (begin() + (__builtin_constant_p(pos) && pos >= npos ? size() : min(pos,size()))); }
|
88 |
|
|
inline iterator iat (uoff_t pos) { return (const_cast<iterator>(const_cast<const string*>(this)->iat(pos))); }
|
89 |
|
|
const_iterator wiat (uoff_t i) const;
|
90 |
|
|
inline iterator wiat (uoff_t i) { return (const_cast<iterator>(const_cast<const string*>(this)->wiat(i))); }
|
91 |
|
|
inline const_reference back (void) const { return (at(size()-1)); }
|
92 |
|
|
inline reference back (void) { return (at(size()-1)); }
|
93 |
|
|
inline size_type length (void) const { return (distance (utf8_begin(), utf8_end())); }
|
94 |
|
|
inline void append (const_iterator i1, const_iterator i2) { append (i1, distance (i1, i2)); }
|
95 |
|
|
void append (const_pointer s, size_type len);
|
96 |
|
|
void append (const_pointer s);
|
97 |
|
|
void append (size_type n, const_reference c);
|
98 |
|
|
inline void append (size_type n, wvalue_type c) { insert (size(), c, n); }
|
99 |
|
|
inline void append (const_wpointer s1, const_wpointer s2) { insert (size(), s1, s2); }
|
100 |
|
|
inline void append (const_wpointer s) { const_wpointer se (s); for (;se&&*se;++se) ; append (s, se); }
|
101 |
|
|
inline void append (const string& s) { append (s.begin(), s.end()); }
|
102 |
|
|
inline void append (const string& s, uoff_t o, size_type n) { append (s.iat(o), s.iat(o+n)); }
|
103 |
|
|
inline void assign (const_iterator i1, const_iterator i2) { assign (i1, distance (i1, i2)); }
|
104 |
|
|
void assign (const_pointer s, size_type len);
|
105 |
|
|
void assign (const_pointer s);
|
106 |
|
|
inline void assign (const_wpointer s1, const_wpointer s2) { clear(); append (s1, s2); }
|
107 |
|
|
inline void assign (const_wpointer s1) { clear(); append (s1); }
|
108 |
|
|
inline void assign (const string& s) { assign (s.begin(), s.end()); }
|
109 |
|
|
inline void assign (const string& s, uoff_t o, size_type n) { assign (s.iat(o), s.iat(o+n)); }
|
110 |
|
|
size_type copyto (pointer p, size_type n, const_iterator start = NULL) const;
|
111 |
|
|
inline int compare (const string& s) const { return (compare (begin(), end(), s.begin(), s.end())); }
|
112 |
|
|
inline int compare (const_pointer s) const { return (compare (begin(), end(), s, s + strlen(s))); }
|
113 |
|
|
static int compare (const_iterator first1, const_iterator last1, const_iterator first2, const_iterator last2);
|
114 |
|
|
inline operator const value_type* (void) const;
|
115 |
|
|
inline operator value_type* (void);
|
116 |
|
|
inline const string& operator= (const string& s) { assign (s.begin(), s.end()); return (*this); }
|
117 |
|
|
inline const string& operator= (const_reference c) { assign (&c, 1); return (*this); }
|
118 |
|
|
inline const string& operator= (const_pointer s) { assign (s); return (*this); }
|
119 |
|
|
inline const string& operator= (const_wpointer s) { assign (s); return (*this); }
|
120 |
|
|
inline const string& operator+= (const string& s) { append (s.begin(), s.size()); return (*this); }
|
121 |
|
|
inline const string& operator+= (const_reference c) { append (1, c); return (*this); }
|
122 |
|
|
inline const string& operator+= (const_pointer s) { append (s); return (*this); }
|
123 |
|
|
inline const string& operator+= (wvalue_type c) { append (1, c); return (*this); }
|
124 |
|
|
inline const string& operator+= (const_wpointer s) { append (s); return (*this); }
|
125 |
|
|
inline string operator+ (const string& s) const;
|
126 |
|
|
inline bool operator== (const string& s) const { return (memblock::operator== (s)); }
|
127 |
|
|
bool operator== (const_pointer s) const;
|
128 |
|
|
inline bool operator== (const_reference c) const { return (size() == 1 && c == at(0)); }
|
129 |
|
|
inline bool operator!= (const string& s) const { return (!operator== (s)); }
|
130 |
|
|
inline bool operator!= (const_pointer s) const { return (!operator== (s)); }
|
131 |
|
|
inline bool operator!= (const_reference c) const { return (!operator== (c)); }
|
132 |
|
|
inline bool operator< (const string& s) const { return (0 > compare (s)); }
|
133 |
|
|
inline bool operator< (const_pointer s) const { return (0 > compare (s)); }
|
134 |
|
|
inline bool operator< (const_reference c) const { return (0 > compare (begin(), end(), &c, &c + 1)); }
|
135 |
|
|
inline bool operator> (const_pointer s) const { return (0 < compare (s)); }
|
136 |
|
|
void insert (const uoff_t ip, wvalue_type c, size_type n = 1);
|
137 |
|
|
void insert (const uoff_t ip, const_wpointer first, const_wpointer last, const size_type n = 1);
|
138 |
|
|
iterator insert (iterator start, const_reference c, size_type n = 1);
|
139 |
|
|
iterator insert (iterator start, const_pointer s, size_type n = 1);
|
140 |
|
|
iterator insert (iterator start, const_pointer first, const_iterator last, size_type n = 1);
|
141 |
|
|
inline void insert (uoff_t ip, const_pointer s, size_type nlen) { insert (iat(ip), s, s + nlen); }
|
142 |
|
|
inline void insert (uoff_t ip, size_type n, value_type c) { insert (iat(ip), c, n); }
|
143 |
|
|
inline void insert (uoff_t ip, const string& s, uoff_t sp, size_type slen) { insert (iat(ip), s.iat(sp), s.iat(sp + slen)); }
|
144 |
|
|
iterator erase (iterator epo, size_type n = 1);
|
145 |
|
|
void erase (uoff_t epo, size_type n = 1);
|
146 |
|
|
inline iterator erase (iterator first, const_iterator last) { return (erase (first, size_type(distance(first,last)))); }
|
147 |
|
|
inline void eraser (uoff_t first, uoff_t last) { erase (iat(first), iat(last)); }
|
148 |
|
|
inline void push_back (const_reference c) { append (1, c); }
|
149 |
|
|
inline void push_back (wvalue_type c) { append (1, c); }
|
150 |
|
|
inline void pop_back (void) { resize (size() - 1); }
|
151 |
|
|
void replace (iterator first, iterator last, const_pointer s);
|
152 |
|
|
void replace (iterator first, iterator last, const_pointer i1, const_pointer i2, size_type n = 1);
|
153 |
|
|
inline void replace (iterator first, iterator last, const string& s) { replace (first, last, s.begin(), s.end()); }
|
154 |
|
|
inline void replace (iterator first, iterator last, const_pointer s, size_type slen) { replace (first, last, s, s + slen); }
|
155 |
|
|
inline void replace (iterator first, iterator last, size_type n, value_type c) { replace (first, last, &c, &c + 1, n); }
|
156 |
|
|
inline void replace (uoff_t rp, size_type n, const string& s) { replace (iat(rp), iat(rp + n), s); }
|
157 |
|
|
inline void replace (uoff_t rp, size_type n, const string& s, uoff_t sp, size_type slen) { replace (iat(rp), iat(rp + n), s.iat(sp), s.iat(sp + slen)); }
|
158 |
|
|
inline void replace (uoff_t rp, size_type n, const_pointer s, size_type slen) { replace (iat(rp), iat(rp + n), s, s + slen); }
|
159 |
|
|
inline void replace (uoff_t rp, size_type n, const_pointer s) { replace (iat(rp), iat(rp + n), string(s)); }
|
160 |
|
|
inline void replace (uoff_t rp, size_type n, size_type count, value_type c) { replace (iat(rp), iat(rp + n), count, c); }
|
161 |
|
|
inline string substr (uoff_t o, size_type n = npos) const { return (string (*this, o, n)); }
|
162 |
|
|
uoff_t find (const_reference c, uoff_t pos = 0) const;
|
163 |
|
|
uoff_t find (const string& s, uoff_t pos = 0) const;
|
164 |
|
|
uoff_t rfind (const_reference c, uoff_t pos = npos) const;
|
165 |
|
|
uoff_t rfind (const string& s, uoff_t pos = npos) const;
|
166 |
|
|
uoff_t find_first_of (const string& s, uoff_t pos = 0) const;
|
167 |
|
|
uoff_t find_first_not_of (const string& s, uoff_t pos = 0) const;
|
168 |
|
|
uoff_t find_last_of (const string& s, uoff_t pos = npos) const;
|
169 |
|
|
uoff_t find_last_not_of (const string& s, uoff_t pos = npos) const;
|
170 |
|
|
int vformat (const char* fmt, va_list args);
|
171 |
|
|
int format (const char* fmt, ...) __attribute__((__format__(__printf__, 2, 3)));
|
172 |
|
|
void read (istream&);
|
173 |
|
|
void write (ostream& os) const;
|
174 |
|
|
size_t stream_size (void) const;
|
175 |
|
|
static hashvalue_t hash (const char* f1, const char* l1);
|
176 |
|
|
protected:
|
177 |
|
|
virtual size_type minimumFreeCapacity (void) const throw() __attribute__((const));
|
178 |
|
|
};
|
179 |
|
|
|
180 |
|
|
//----------------------------------------------------------------------
|
181 |
|
|
|
182 |
|
|
/// Assigns itself the value of string \p s
|
183 |
|
|
inline string::string (const cmemlink& s)
|
184 |
|
|
: memblock ()
|
185 |
|
|
{
|
186 |
|
|
assign (const_iterator (s.begin()), s.size());
|
187 |
|
|
}
|
188 |
|
|
|
189 |
|
|
/// Assigns itself a [o,o+n) substring of \p s.
|
190 |
|
|
inline string::string (const string& s, uoff_t o, size_type n)
|
191 |
|
|
: memblock()
|
192 |
|
|
{
|
193 |
|
|
assign (s, o, n);
|
194 |
|
|
}
|
195 |
|
|
|
196 |
|
|
/// Copies the value of \p s of length \p len into itself.
|
197 |
|
|
inline string::string (const_pointer s, size_type len)
|
198 |
|
|
: memblock ()
|
199 |
|
|
{
|
200 |
|
|
assign (s, len);
|
201 |
|
|
}
|
202 |
|
|
|
203 |
|
|
/// Copies into itself the string data between \p s1 and \p s2
|
204 |
|
|
inline string::string (const_pointer s1, const_pointer s2)
|
205 |
|
|
: memblock ()
|
206 |
|
|
{
|
207 |
|
|
assert (s1 <= s2 && "Negative ranges result in memory allocation errors.");
|
208 |
|
|
assign (s1, s2);
|
209 |
|
|
}
|
210 |
|
|
|
211 |
|
|
/// Returns the pointer to the first character.
|
212 |
|
|
inline string::operator const string::value_type* (void) const
|
213 |
|
|
{
|
214 |
|
|
assert ((!end() || !*end()) && "This string is linked to data that is not 0-terminated. This may cause serious security problems. Please assign the data instead of linking.");
|
215 |
|
|
return (begin());
|
216 |
|
|
}
|
217 |
|
|
|
218 |
|
|
/// Returns the pointer to the first character.
|
219 |
|
|
inline string::operator string::value_type* (void)
|
220 |
|
|
{
|
221 |
|
|
assert ((end() && !*end()) && "This string is linked to data that is not 0-terminated. This may cause serious security problems. Please assign the data instead of linking.");
|
222 |
|
|
return (begin());
|
223 |
|
|
}
|
224 |
|
|
|
225 |
|
|
/// Concatenates itself with \p s
|
226 |
|
|
inline string string::operator+ (const string& s) const
|
227 |
|
|
{
|
228 |
|
|
string result (*this);
|
229 |
|
|
result += s;
|
230 |
|
|
return (result);
|
231 |
|
|
}
|
232 |
|
|
|
233 |
|
|
/// Resize to \p n and fill new entries with \p c
|
234 |
|
|
inline void string::resize (size_type n, value_type c)
|
235 |
|
|
{
|
236 |
|
|
const size_type oldn = size();
|
237 |
|
|
resize (n);
|
238 |
|
|
fill_n (iat(oldn), max(ssize_t(n-oldn),0), c);
|
239 |
|
|
}
|
240 |
|
|
|
241 |
|
|
//----------------------------------------------------------------------
|
242 |
|
|
// Operators needed to avoid comparing pointer to pointer
|
243 |
|
|
|
244 |
|
|
#define PTR_STRING_CMP(op, impl) \
|
245 |
|
|
inline bool op (const char* s1, const string& s2) { return impl; }
|
246 |
|
|
PTR_STRING_CMP (operator==, (s2 == s1))
|
247 |
|
|
PTR_STRING_CMP (operator!=, (s2 != s1))
|
248 |
|
|
PTR_STRING_CMP (operator<, (s2 > s1))
|
249 |
|
|
PTR_STRING_CMP (operator<=, (s2 >= s1))
|
250 |
|
|
PTR_STRING_CMP (operator>, (s2 < s1))
|
251 |
|
|
PTR_STRING_CMP (operator>=, (s2 <= s1))
|
252 |
|
|
#undef PTR_STRING_CMP
|
253 |
|
|
|
254 |
|
|
//----------------------------------------------------------------------
|
255 |
|
|
|
256 |
|
|
inline hashvalue_t hash_value (const char* first, const char* last)
|
257 |
|
|
{ return (string::hash (first, last)); }
|
258 |
|
|
inline hashvalue_t hash_value (const char* v)
|
259 |
|
|
{ return (hash_value (v, v + strlen(v))); }
|
260 |
|
|
|
261 |
|
|
//----------------------------------------------------------------------
|
262 |
|
|
|
263 |
|
|
} // namespace ustl
|
264 |
|
|
|
265 |
|
|
// Specialization for stream alignment
|
266 |
|
|
ALIGNOF (ustl::string, alignof (string::value_type()))
|
267 |
|
|
|
268 |
|
|
#endif
|