Added the checked iterator, function prior that replaces previous, and updated the html documentation
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@66 a809a056-fc17-0410-9590-b4f493f8b08e
This commit is contained in:
parent
d2ee7164b6
commit
e4dc80dae3
5 changed files with 1549 additions and 953 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -29,7 +29,7 @@ DEALINGS IN THE SOFTWARE.
|
||||||
#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
||||||
|
|
||||||
#include "core.h"
|
#include "core.h"
|
||||||
#include <exception>
|
#include <stdexcept>
|
||||||
|
|
||||||
namespace utf8
|
namespace utf8
|
||||||
{
|
{
|
||||||
|
@ -152,7 +152,18 @@ namespace utf8
|
||||||
return cp;
|
return cp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename octet_iterator>
|
||||||
|
uint32_t prior(octet_iterator& it, octet_iterator start)
|
||||||
|
{
|
||||||
|
octet_iterator end = it;
|
||||||
|
while (internal::is_trail(*(--it)))
|
||||||
|
if (it < start)
|
||||||
|
throw invalid_utf8(*it); // error - no lead byte in the sequence
|
||||||
|
octet_iterator temp = it;
|
||||||
|
return next(temp, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Deprecated in versions that include "prior"
|
||||||
template <typename octet_iterator>
|
template <typename octet_iterator>
|
||||||
uint32_t previous(octet_iterator& it, octet_iterator pass_start)
|
uint32_t previous(octet_iterator& it, octet_iterator pass_start)
|
||||||
{
|
{
|
||||||
|
@ -240,37 +251,50 @@ namespace utf8
|
||||||
// The iterator class
|
// The iterator class
|
||||||
template <typename octet_iterator>
|
template <typename octet_iterator>
|
||||||
class iterator {
|
class iterator {
|
||||||
static const typename std::iterator_traits<octet_iterator>::difference_type MAX_UTF8_SEQUENCE_LENGTH = 4;
|
|
||||||
octet_iterator it;
|
octet_iterator it;
|
||||||
|
octet_iterator range_start;
|
||||||
|
octet_iterator range_end;
|
||||||
public:
|
public:
|
||||||
explicit iterator (const octet_iterator& octet_it) : it(octet_it) {}
|
explicit iterator (const octet_iterator& octet_it,
|
||||||
|
const octet_iterator& range_start,
|
||||||
|
const octet_iterator& range_end) :
|
||||||
|
it(octet_it), range_start(range_start), range_end(range_end)
|
||||||
|
{
|
||||||
|
if (it < range_start || it > range_end)
|
||||||
|
throw std::out_of_range("Invalid utf-8 iterator position");
|
||||||
|
}
|
||||||
// the default "big three" are OK
|
// the default "big three" are OK
|
||||||
uint32_t operator * () const
|
uint32_t operator * () const
|
||||||
{
|
{
|
||||||
octet_iterator temp = it;
|
octet_iterator temp = it;
|
||||||
return next(temp, temp + MAX_UTF8_SEQUENCE_LENGTH);
|
return next(temp, range_end);
|
||||||
|
}
|
||||||
|
bool operator == (const iterator& rhs) const
|
||||||
|
{
|
||||||
|
if (range_start != rhs.range_start && range_end != rhs.range_end)
|
||||||
|
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
|
||||||
|
return (it == rhs.it);
|
||||||
}
|
}
|
||||||
bool operator == (const iterator& rhs) const { return (it == rhs.it); }
|
|
||||||
iterator& operator ++ ()
|
iterator& operator ++ ()
|
||||||
{
|
{
|
||||||
next(it, it + MAX_UTF8_SEQUENCE_LENGTH);
|
next(it, range_end);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
iterator operator ++ (int)
|
iterator operator ++ (int)
|
||||||
{
|
{
|
||||||
iterator temp = *this;
|
iterator temp = *this;
|
||||||
next(it, it + MAX_UTF8_SEQUENCE_LENGTH);
|
next(it, range_end);
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
iterator& operator -- ()
|
iterator& operator -- ()
|
||||||
{
|
{
|
||||||
previous(it, it - MAX_UTF8_SEQUENCE_LENGTH);
|
prior(it, range_start);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
iterator operator -- (int)
|
iterator operator -- (int)
|
||||||
{
|
{
|
||||||
iterator temp = *this;
|
iterator temp = *this;
|
||||||
previous(it, it - MAX_UTF8_SEQUENCE_LENGTH);
|
prior(it, range_start);
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
}; // class iterator
|
}; // class iterator
|
||||||
|
|
|
@ -88,13 +88,20 @@ namespace utf8
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename octet_iterator>
|
template <typename octet_iterator>
|
||||||
uint32_t previous(octet_iterator& it)
|
uint32_t prior(octet_iterator& it)
|
||||||
{
|
{
|
||||||
while (internal::is_trail(*(--it))) ;
|
while (internal::is_trail(*(--it))) ;
|
||||||
octet_iterator temp = it;
|
octet_iterator temp = it;
|
||||||
return next(temp);
|
return next(temp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
|
||||||
|
template <typename octet_iterator>
|
||||||
|
inline uint32_t previous(octet_iterator& it)
|
||||||
|
{
|
||||||
|
return prior(it);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename octet_iterator, typename distance_type>
|
template <typename octet_iterator, typename distance_type>
|
||||||
void advance (octet_iterator& it, distance_type n)
|
void advance (octet_iterator& it, distance_type n)
|
||||||
{
|
{
|
||||||
|
|
|
@ -22,6 +22,8 @@ int main()
|
||||||
end = append(0x10346, u);
|
end = append(0x10346, u);
|
||||||
assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0);
|
assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//next
|
//next
|
||||||
char* twochars = "\xe6\x97\xa5\xd1\x88";
|
char* twochars = "\xe6\x97\xa5\xd1\x88";
|
||||||
char* w = twochars;
|
char* w = twochars;
|
||||||
|
@ -41,8 +43,24 @@ int main()
|
||||||
assert (cp == 0x0448);
|
assert (cp == 0x0448);
|
||||||
assert (w == threechars + 9);
|
assert (w == threechars + 9);
|
||||||
|
|
||||||
|
//prior
|
||||||
|
w = twochars + 3;
|
||||||
|
cp = prior (w, twochars);
|
||||||
|
assert (cp == 0x65e5);
|
||||||
|
assert (w == twochars);
|
||||||
|
|
||||||
//previous
|
w = threechars + 9;
|
||||||
|
cp = prior(w, threechars);
|
||||||
|
assert (cp == 0x0448);
|
||||||
|
assert (w == threechars + 7);
|
||||||
|
cp = prior(w, threechars);
|
||||||
|
assert (cp == 0x65e5);
|
||||||
|
assert (w == threechars + 4);
|
||||||
|
cp = prior(w, threechars);
|
||||||
|
assert (cp == 0x10346);
|
||||||
|
assert (w == threechars);
|
||||||
|
|
||||||
|
//previous (deprecated)
|
||||||
w = twochars + 3;
|
w = twochars + 3;
|
||||||
cp = previous (w, twochars - 1);
|
cp = previous (w, twochars - 1);
|
||||||
assert (cp == 0x65e5);
|
assert (cp == 0x65e5);
|
||||||
|
@ -131,19 +149,19 @@ int main()
|
||||||
assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.end(), fixed_invalid_sequence));
|
assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.end(), fixed_invalid_sequence));
|
||||||
|
|
||||||
// iterator
|
// iterator
|
||||||
utf8::iterator<char*> it(threechars);
|
utf8::iterator<char*> it(threechars, threechars, threechars + 9);
|
||||||
utf8::iterator<char*> it2 = it;
|
utf8::iterator<char*> it2 = it;
|
||||||
assert (it2 == it);
|
assert (it2 == it);
|
||||||
assert (*it == 0x10346);
|
assert (*it == 0x10346);
|
||||||
assert (*(++it) == 0x65e5);
|
assert (*(++it) == 0x65e5);
|
||||||
assert ((*it++) == 0x65e5);
|
assert ((*it++) == 0x65e5);
|
||||||
assert (*it == 0x0448);
|
assert (*it == 0x0448);
|
||||||
utf8::iterator<char*> endit (threechars + 9);
|
utf8::iterator<char*> endit (threechars + 9, threechars, threechars + 9);
|
||||||
assert (++it == endit);
|
assert (++it == endit);
|
||||||
assert (*(--it) == 0x0448);
|
assert (*(--it) == 0x0448);
|
||||||
assert ((*it--) == 0x0448);
|
assert ((*it--) == 0x0448);
|
||||||
assert (*it == 0x65e5);
|
assert (*it == 0x65e5);
|
||||||
assert (--it == utf8::iterator<char*>(threechars));
|
assert (--it == utf8::iterator<char*>(threechars, threechars, threechars + 9));
|
||||||
assert (*it == 0x10346);
|
assert (*it == 0x10346);
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
|
@ -179,7 +197,8 @@ int main()
|
||||||
assert (w == threechars + 9);
|
assert (w == threechars + 9);
|
||||||
|
|
||||||
|
|
||||||
//previous
|
//previous (calls prior internally)
|
||||||
|
|
||||||
w = twochars + 3;
|
w = twochars + 3;
|
||||||
cp = unchecked::previous (w);
|
cp = unchecked::previous (w);
|
||||||
assert (cp == 0x65e5);
|
assert (cp == 0x65e5);
|
||||||
|
|
Loading…
Add table
Reference in a new issue