| 1 |
786 |
skrzyp |
// This file is part of the uSTL library, an STL implementation.
|
| 2 |
|
|
//
|
| 3 |
|
|
// Copyright (c) 2005-2009 by Mike Sharov <msharov@users.sourceforge.net>
|
| 4 |
|
|
// This file is free software, distributed under the MIT License.
|
| 5 |
|
|
|
| 6 |
|
|
#include "ustring.h"
|
| 7 |
|
|
#include "mistream.h"
|
| 8 |
|
|
#include "mostream.h"
|
| 9 |
|
|
#include "ualgo.h"
|
| 10 |
|
|
#include <stdio.h> // for vsnprintf (in string::format)
|
| 11 |
|
|
|
| 12 |
|
|
namespace ustl {
|
| 13 |
|
|
|
| 14 |
|
|
//----------------------------------------------------------------------
|
| 15 |
|
|
|
| 16 |
|
|
const uoff_t string::npos;
|
| 17 |
|
|
|
| 18 |
|
|
//----------------------------------------------------------------------
|
| 19 |
|
|
|
| 20 |
|
|
/// Assigns itself the value of string \p s
|
| 21 |
|
|
string::string (const string& s)
|
| 22 |
|
|
: memblock ((s.size()+1) & (s.is_linked()-1)) // Allocate with terminator if not linked (can't call virtuals from base ctor)
|
| 23 |
|
|
{
|
| 24 |
|
|
if (s.is_linked())
|
| 25 |
|
|
relink (s.c_str(), s.size());
|
| 26 |
|
|
else {
|
| 27 |
|
|
copy_n (s.begin(), size(), begin());
|
| 28 |
|
|
relink (begin(), size()-1); // --m_Size
|
| 29 |
|
|
}
|
| 30 |
|
|
}
|
| 31 |
|
|
|
| 32 |
|
|
/// Links to \p s
|
| 33 |
|
|
string::string (const_pointer s)
|
| 34 |
|
|
: memblock ()
|
| 35 |
|
|
{
|
| 36 |
|
|
if (!s) s = "";
|
| 37 |
|
|
relink (s, strlen(s));
|
| 38 |
|
|
}
|
| 39 |
|
|
|
| 40 |
|
|
/// Creates a string of length \p n filled with character \p c.
|
| 41 |
|
|
string::string (size_type n, value_type c)
|
| 42 |
|
|
: memblock (n+1) // because base ctor can't call virtuals of this class
|
| 43 |
|
|
{
|
| 44 |
|
|
relink (begin(), size()-1); // --m_Size
|
| 45 |
|
|
fill_n (begin(), n, c);
|
| 46 |
|
|
at(n) = 0;
|
| 47 |
|
|
}
|
| 48 |
|
|
|
| 49 |
|
|
/// Resize the string to \p n characters. New space contents is undefined.
|
| 50 |
|
|
void string::resize (size_type n)
|
| 51 |
|
|
{
|
| 52 |
|
|
if (!(n | memblock::capacity()))
|
| 53 |
|
|
return (relink ("",0));
|
| 54 |
|
|
memblock::resize (n);
|
| 55 |
|
|
at(n) = 0;
|
| 56 |
|
|
}
|
| 57 |
|
|
|
| 58 |
|
|
/// Assigns itself the value of string \p s
|
| 59 |
|
|
void string::assign (const_pointer s)
|
| 60 |
|
|
{
|
| 61 |
|
|
if (!s) s = "";
|
| 62 |
|
|
assign (s, strlen (s));
|
| 63 |
|
|
}
|
| 64 |
|
|
|
| 65 |
|
|
/// Assigns itself the value of string \p s of length \p len.
|
| 66 |
|
|
void string::assign (const_pointer s, size_type len)
|
| 67 |
|
|
{
|
| 68 |
|
|
while (len && s[len - 1] == 0)
|
| 69 |
|
|
-- len;
|
| 70 |
|
|
resize (len);
|
| 71 |
|
|
copy (s, len);
|
| 72 |
|
|
}
|
| 73 |
|
|
|
| 74 |
|
|
/// Appends to itself the value of string \p s of length \p len.
|
| 75 |
|
|
void string::append (const_pointer s)
|
| 76 |
|
|
{
|
| 77 |
|
|
if (!s) s = "";
|
| 78 |
|
|
append (s, strlen (s));
|
| 79 |
|
|
}
|
| 80 |
|
|
|
| 81 |
|
|
/// Appends to itself the value of string \p s of length \p len.
|
| 82 |
|
|
void string::append (const_pointer s, size_type len)
|
| 83 |
|
|
{
|
| 84 |
|
|
while (len && s[len - 1] == 0)
|
| 85 |
|
|
-- len;
|
| 86 |
|
|
resize (size() + len);
|
| 87 |
|
|
copy_n (s, len, end() - len);
|
| 88 |
|
|
}
|
| 89 |
|
|
|
| 90 |
|
|
/// Appends to itself \p n characters of value \p c.
|
| 91 |
|
|
void string::append (size_type n, value_type c)
|
| 92 |
|
|
{
|
| 93 |
|
|
resize (size() + n);
|
| 94 |
|
|
fill_n (end() - n, n, c);
|
| 95 |
|
|
}
|
| 96 |
|
|
|
| 97 |
|
|
/// Copies into itself at offset \p start, the value of string \p p of length \p n.
|
| 98 |
|
|
string::size_type string::copyto (pointer p, size_type n, const_iterator start) const
|
| 99 |
|
|
{
|
| 100 |
|
|
assert (p && n);
|
| 101 |
|
|
if (!start)
|
| 102 |
|
|
start = begin();
|
| 103 |
|
|
const size_type btc = min(n-1, size());
|
| 104 |
|
|
copy_n (start, btc, p);
|
| 105 |
|
|
p[btc] = 0;
|
| 106 |
|
|
return (btc+1);
|
| 107 |
|
|
}
|
| 108 |
|
|
|
| 109 |
|
|
/// Returns comparison value regarding string \p s.
|
| 110 |
|
|
/// The return value is:
|
| 111 |
|
|
/// \li 1 if this string is greater (by value, not length) than string \p s
|
| 112 |
|
|
/// \li 0 if this string is equal to string \p s
|
| 113 |
|
|
/// \li -1 if this string is less than string \p s
|
| 114 |
|
|
///
|
| 115 |
|
|
/*static*/ int string::compare (const_iterator first1, const_iterator last1, const_iterator first2, const_iterator last2)
|
| 116 |
|
|
{
|
| 117 |
|
|
assert (first1 <= last1 && (first2 <= last2 || !last2) && "Negative ranges result in memory allocation errors.");
|
| 118 |
|
|
const size_type len1 = distance (first1, last1), len2 = distance (first2, last2);
|
| 119 |
|
|
const int rvbylen = sign (int(len1 - len2));
|
| 120 |
|
|
int rv = memcmp (first1, first2, min (len1, len2));
|
| 121 |
|
|
return (rv ? rv : rvbylen);
|
| 122 |
|
|
}
|
| 123 |
|
|
|
| 124 |
|
|
/// Returns true if this string is equal to string \p s.
|
| 125 |
|
|
bool string::operator== (const_pointer s) const
|
| 126 |
|
|
{
|
| 127 |
|
|
if (!s) s = "";
|
| 128 |
|
|
return (size() == strlen(s) && 0 == memcmp (c_str(), s, size()));
|
| 129 |
|
|
}
|
| 130 |
|
|
|
| 131 |
|
|
/// Returns the beginning of character \p i.
|
| 132 |
|
|
string::const_iterator string::wiat (uoff_t i) const
|
| 133 |
|
|
{
|
| 134 |
|
|
utf8in_iterator<string::const_iterator> cfinder (begin());
|
| 135 |
|
|
cfinder += i;
|
| 136 |
|
|
return (cfinder.base());
|
| 137 |
|
|
}
|
| 138 |
|
|
|
| 139 |
|
|
/// Inserts wide character \p c at \p ipo \p n times as a UTF-8 string.
|
| 140 |
|
|
///
|
| 141 |
|
|
/// \p ipo is a byte position, not a character position, and is intended
|
| 142 |
|
|
/// to be obtained from one of the find functions. Generally you are not
|
| 143 |
|
|
/// able to know the character position in a localized string; different
|
| 144 |
|
|
/// languages will have different character counts, so use find instead.
|
| 145 |
|
|
///
|
| 146 |
|
|
void string::insert (const uoff_t ipo, wchar_t c, size_type n)
|
| 147 |
|
|
{
|
| 148 |
|
|
iterator ip (iat(ipo));
|
| 149 |
|
|
ip = iterator (memblock::insert (memblock::iterator(ip), n * Utf8Bytes(c)));
|
| 150 |
|
|
fill_n (utf8out (ip), n, c);
|
| 151 |
|
|
*end() = 0;
|
| 152 |
|
|
}
|
| 153 |
|
|
|
| 154 |
|
|
/// Inserts sequence of wide characters at \p ipo (byte position from a find call)
|
| 155 |
|
|
void string::insert (const uoff_t ipo, const wchar_t* first, const wchar_t* last, const size_type n)
|
| 156 |
|
|
{
|
| 157 |
|
|
iterator ip (iat(ipo));
|
| 158 |
|
|
size_type nti = distance (first, last), bti = 0;
|
| 159 |
|
|
for (uoff_t i = 0; i < nti; ++ i)
|
| 160 |
|
|
bti += Utf8Bytes(first[i]);
|
| 161 |
|
|
ip = iterator (memblock::insert (memblock::iterator(ip), n * bti));
|
| 162 |
|
|
utf8out_iterator<string::iterator> uout (utf8out (ip));
|
| 163 |
|
|
for (uoff_t j = 0; j < n; ++ j)
|
| 164 |
|
|
for (uoff_t k = 0; k < nti; ++ k, ++ uout)
|
| 165 |
|
|
*uout = first[k];
|
| 166 |
|
|
*end() = 0;
|
| 167 |
|
|
}
|
| 168 |
|
|
|
| 169 |
|
|
/// Inserts character \p c into this string at \p start.
|
| 170 |
|
|
string::iterator string::insert (iterator start, const_reference c, size_type n)
|
| 171 |
|
|
{
|
| 172 |
|
|
start = iterator (memblock::insert (memblock::iterator(start), n));
|
| 173 |
|
|
fill_n (start, n, c);
|
| 174 |
|
|
*end() = 0;
|
| 175 |
|
|
return (start);
|
| 176 |
|
|
}
|
| 177 |
|
|
|
| 178 |
|
|
/// Inserts \p count instances of string \p s at offset \p start.
|
| 179 |
|
|
string::iterator string::insert (iterator start, const_pointer s, size_type n)
|
| 180 |
|
|
{
|
| 181 |
|
|
if (!s) s = "";
|
| 182 |
|
|
return (insert (start, s, s + strlen(s), n));
|
| 183 |
|
|
}
|
| 184 |
|
|
|
| 185 |
|
|
/// Inserts [first,last] \p n times.
|
| 186 |
|
|
string::iterator string::insert (iterator start, const_pointer first, const_pointer last, size_type n)
|
| 187 |
|
|
{
|
| 188 |
|
|
assert (first <= last);
|
| 189 |
|
|
assert (begin() <= start && end() >= start);
|
| 190 |
|
|
assert ((first < begin() || first >= end() || size() + abs_distance(first,last) < capacity()) && "Insertion of self with autoresize is not supported");
|
| 191 |
|
|
start = iterator (memblock::insert (memblock::iterator(start), distance(first, last) * n));
|
| 192 |
|
|
fill (memblock::iterator(start), first, distance(first, last), n);
|
| 193 |
|
|
*end() = 0;
|
| 194 |
|
|
return (start);
|
| 195 |
|
|
}
|
| 196 |
|
|
|
| 197 |
|
|
/// Erases \p size bytes at \p ep.
|
| 198 |
|
|
string::iterator string::erase (iterator ep, size_type n)
|
| 199 |
|
|
{
|
| 200 |
|
|
string::iterator rv = memblock::erase (memblock::iterator(ep), n);
|
| 201 |
|
|
*end() = 0;
|
| 202 |
|
|
return (rv);
|
| 203 |
|
|
}
|
| 204 |
|
|
|
| 205 |
|
|
/// Erases \p n bytes at byte offset \p epo.
|
| 206 |
|
|
void string::erase (uoff_t epo, size_type n)
|
| 207 |
|
|
{
|
| 208 |
|
|
erase (iat(epo), n);
|
| 209 |
|
|
}
|
| 210 |
|
|
|
| 211 |
|
|
/// Replaces range [\p start, \p start + \p len] with string \p s.
|
| 212 |
|
|
void string::replace (iterator first, iterator last, const_pointer s)
|
| 213 |
|
|
{
|
| 214 |
|
|
if (!s) s = "";
|
| 215 |
|
|
replace (first, last, s, s + strlen(s));
|
| 216 |
|
|
}
|
| 217 |
|
|
|
| 218 |
|
|
/// Replaces range [\p start, \p start + \p len] with \p count instances of string \p s.
|
| 219 |
|
|
void string::replace (iterator first, iterator last, const_pointer i1, const_pointer i2, size_type n)
|
| 220 |
|
|
{
|
| 221 |
|
|
assert (first <= last);
|
| 222 |
|
|
assert (n || distance(first, last));
|
| 223 |
|
|
assert (first >= begin() && first <= end() && last >= first && last <= end());
|
| 224 |
|
|
assert ((i1 < begin() || i1 >= end() || abs_distance(i1,i2) * n + size() < capacity()) && "Replacement by self can not autoresize");
|
| 225 |
|
|
const size_type bte = distance(first, last), bti = distance(i1, i2) * n;
|
| 226 |
|
|
if (bti < bte)
|
| 227 |
|
|
first = iterator (memblock::erase (memblock::iterator(first), bte - bti));
|
| 228 |
|
|
else if (bte < bti)
|
| 229 |
|
|
first = iterator (memblock::insert (memblock::iterator(first), bti - bte));
|
| 230 |
|
|
fill (memblock::iterator(first), i1, distance(i1, i2), n);
|
| 231 |
|
|
*end() = 0;
|
| 232 |
|
|
}
|
| 233 |
|
|
|
| 234 |
|
|
/// Returns the offset of the first occurence of \p c after \p pos.
|
| 235 |
|
|
uoff_t string::find (const_reference c, uoff_t pos) const
|
| 236 |
|
|
{
|
| 237 |
|
|
const_iterator found = ::ustl::find (iat(pos), end(), c);
|
| 238 |
|
|
return (found < end() ? distance(begin(),found) : npos);
|
| 239 |
|
|
}
|
| 240 |
|
|
|
| 241 |
|
|
/// Returns the offset of the first occurence of substring \p s of length \p n after \p pos.
|
| 242 |
|
|
uoff_t string::find (const string& s, uoff_t pos) const
|
| 243 |
|
|
{
|
| 244 |
|
|
if (s.empty() || s.size() > size() - pos)
|
| 245 |
|
|
return (npos);
|
| 246 |
|
|
const uoff_t endi = s.size() - 1;
|
| 247 |
|
|
const_reference endchar = s[endi];
|
| 248 |
|
|
uoff_t lastPos = endi;
|
| 249 |
|
|
while (lastPos-- && s[lastPos] != endchar) ;
|
| 250 |
|
|
const size_type skip = endi - lastPos;
|
| 251 |
|
|
const_iterator i = iat(pos) + endi;
|
| 252 |
|
|
for (; i < end() && (i = ::ustl::find (i, end(), endchar)) < end(); i += skip)
|
| 253 |
|
|
if (memcmp (i - endi, s.c_str(), s.size()) == 0)
|
| 254 |
|
|
return (distance (begin(), i) - endi);
|
| 255 |
|
|
return (npos);
|
| 256 |
|
|
}
|
| 257 |
|
|
|
| 258 |
|
|
/// Returns the offset of the last occurence of character \p c before \p pos.
|
| 259 |
|
|
uoff_t string::rfind (const_reference c, uoff_t pos) const
|
| 260 |
|
|
{
|
| 261 |
|
|
for (int i = min(pos,size()-1); i >= 0; --i)
|
| 262 |
|
|
if (at(i) == c)
|
| 263 |
|
|
return (i);
|
| 264 |
|
|
return (npos);
|
| 265 |
|
|
}
|
| 266 |
|
|
|
| 267 |
|
|
/// Returns the offset of the last occurence of substring \p s of size \p n before \p pos.
|
| 268 |
|
|
uoff_t string::rfind (const string& s, uoff_t pos) const
|
| 269 |
|
|
{
|
| 270 |
|
|
const_iterator d = iat(pos) - 1;
|
| 271 |
|
|
const_iterator sp = begin() + s.size() - 1;
|
| 272 |
|
|
const_iterator m = s.end() - 1;
|
| 273 |
|
|
for (long int i = 0; d > sp && size_type(i) < s.size(); -- d)
|
| 274 |
|
|
for (i = 0; size_type(i) < s.size(); ++ i)
|
| 275 |
|
|
if (m[-i] != d[-i])
|
| 276 |
|
|
break;
|
| 277 |
|
|
return (d > sp ? distance (begin(), d + 2 - s.size()) : npos);
|
| 278 |
|
|
}
|
| 279 |
|
|
|
| 280 |
|
|
/// Returns the offset of the first occurence of one of characters in \p s of size \p n after \p pos.
|
| 281 |
|
|
uoff_t string::find_first_of (const string& s, uoff_t pos) const
|
| 282 |
|
|
{
|
| 283 |
|
|
for (uoff_t i = min(pos,size()); i < size(); ++ i)
|
| 284 |
|
|
if (s.find (at(i)) != npos)
|
| 285 |
|
|
return (i);
|
| 286 |
|
|
return (npos);
|
| 287 |
|
|
}
|
| 288 |
|
|
|
| 289 |
|
|
/// Returns the offset of the first occurence of one of characters not in \p s of size \p n after \p pos.
|
| 290 |
|
|
uoff_t string::find_first_not_of (const string& s, uoff_t pos) const
|
| 291 |
|
|
{
|
| 292 |
|
|
for (uoff_t i = min(pos,size()); i < size(); ++ i)
|
| 293 |
|
|
if (s.find (at(i)) == npos)
|
| 294 |
|
|
return (i);
|
| 295 |
|
|
return (npos);
|
| 296 |
|
|
}
|
| 297 |
|
|
|
| 298 |
|
|
/// Returns the offset of the last occurence of one of characters in \p s of size \p n before \p pos.
|
| 299 |
|
|
uoff_t string::find_last_of (const string& s, uoff_t pos) const
|
| 300 |
|
|
{
|
| 301 |
|
|
for (int i = min(pos,size()-1); i >= 0; -- i)
|
| 302 |
|
|
if (s.find (at(i)) != npos)
|
| 303 |
|
|
return (i);
|
| 304 |
|
|
return (npos);
|
| 305 |
|
|
}
|
| 306 |
|
|
|
| 307 |
|
|
/// Returns the offset of the last occurence of one of characters not in \p s of size \p n before \p pos.
|
| 308 |
|
|
uoff_t string::find_last_not_of (const string& s, uoff_t pos) const
|
| 309 |
|
|
{
|
| 310 |
|
|
for (int i = min(pos,size()-1); i >= 0; -- i)
|
| 311 |
|
|
if (s.find (at(i)) == npos)
|
| 312 |
|
|
return (i);
|
| 313 |
|
|
return (npos);
|
| 314 |
|
|
}
|
| 315 |
|
|
|
| 316 |
|
|
/// Equivalent to a vsprintf on the string.
|
| 317 |
|
|
int string::vformat (const char* fmt, va_list args)
|
| 318 |
|
|
{
|
| 319 |
|
|
#if HAVE_VA_COPY
|
| 320 |
|
|
va_list args2;
|
| 321 |
|
|
#else
|
| 322 |
|
|
#define args2 args
|
| 323 |
|
|
#undef __va_copy
|
| 324 |
|
|
#define __va_copy(x,y)
|
| 325 |
|
|
#endif
|
| 326 |
|
|
size_t rv = size();
|
| 327 |
|
|
do {
|
| 328 |
|
|
reserve (rv);
|
| 329 |
|
|
__va_copy (args2, args);
|
| 330 |
|
|
rv = vsnprintf (data(), memblock::capacity(), fmt, args2);
|
| 331 |
|
|
rv = min (rv, memblock::capacity());
|
| 332 |
|
|
} while (rv > capacity());
|
| 333 |
|
|
resize (min (rv, capacity()));
|
| 334 |
|
|
return (rv);
|
| 335 |
|
|
}
|
| 336 |
|
|
|
| 337 |
|
|
/// Equivalent to a sprintf on the string.
|
| 338 |
|
|
int string::format (const char* fmt, ...)
|
| 339 |
|
|
{
|
| 340 |
|
|
va_list args;
|
| 341 |
|
|
va_start (args, fmt);
|
| 342 |
|
|
const int rv = vformat (fmt, args);
|
| 343 |
|
|
va_end (args);
|
| 344 |
|
|
return (rv);
|
| 345 |
|
|
}
|
| 346 |
|
|
|
| 347 |
|
|
/// Returns the number of bytes required to write this object to a stream.
|
| 348 |
|
|
size_t string::stream_size (void) const
|
| 349 |
|
|
{
|
| 350 |
|
|
return (Utf8Bytes(size()) + size());
|
| 351 |
|
|
}
|
| 352 |
|
|
|
| 353 |
|
|
/// Reads the object from stream \p os
|
| 354 |
|
|
void string::read (istream& is)
|
| 355 |
|
|
{
|
| 356 |
|
|
char szbuf [8];
|
| 357 |
|
|
is >> szbuf[0];
|
| 358 |
|
|
size_t szsz (Utf8SequenceBytes (szbuf[0]) - 1), n = 0;
|
| 359 |
|
|
if (!is.verify_remaining ("read", "ustl::string", szsz)) return;
|
| 360 |
|
|
is.read (szbuf + 1, szsz);
|
| 361 |
|
|
n = *utf8in(szbuf);
|
| 362 |
|
|
if (!is.verify_remaining ("read", "ustl::string", n)) return;
|
| 363 |
|
|
resize (n);
|
| 364 |
|
|
is.read (data(), size());
|
| 365 |
|
|
}
|
| 366 |
|
|
|
| 367 |
|
|
/// Writes the object to stream \p os
|
| 368 |
|
|
void string::write (ostream& os) const
|
| 369 |
|
|
{
|
| 370 |
|
|
const written_size_type sz (size());
|
| 371 |
|
|
assert (sz == size() && "No support for writing strings larger than 4G");
|
| 372 |
|
|
|
| 373 |
|
|
char szbuf [8];
|
| 374 |
|
|
utf8out_iterator<char*> szout (szbuf);
|
| 375 |
|
|
*szout = sz;
|
| 376 |
|
|
size_t szsz = distance (szbuf, szout.base());
|
| 377 |
|
|
|
| 378 |
|
|
if (!os.verify_remaining ("write", "ustl::string", szsz + sz)) return;
|
| 379 |
|
|
os.write (szbuf, szsz);
|
| 380 |
|
|
os.write (cdata(), sz);
|
| 381 |
|
|
}
|
| 382 |
|
|
|
| 383 |
|
|
/// Returns a hash value for [first, last)
|
| 384 |
|
|
/*static*/ hashvalue_t string::hash (const char* first, const char* last)
|
| 385 |
|
|
{
|
| 386 |
|
|
hashvalue_t h = 0;
|
| 387 |
|
|
// This has the bits flowing into each other from both sides of the number
|
| 388 |
|
|
for (; first < last; ++ first)
|
| 389 |
|
|
h = *first + ((h << 7) | (h >> (BitsInType(hashvalue_t) - 7)));
|
| 390 |
|
|
return (h);
|
| 391 |
|
|
}
|
| 392 |
|
|
|
| 393 |
|
|
string::size_type string::minimumFreeCapacity (void) const throw() { return (1); }
|
| 394 |
|
|
|
| 395 |
|
|
} // namespace ustl
|