Added the checked iterator, function prior that replaces previous, and updated the html documentation
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@66 a809a056-fc17-0410-9590-b4f493f8b08e
This commit is contained in:
parent
d2ee7164b6
commit
e4dc80dae3
5 changed files with 1549 additions and 953 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -29,7 +29,7 @@ DEALINGS IN THE SOFTWARE.
|
|||
#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
||||
|
||||
#include "core.h"
|
||||
#include <exception>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace utf8
|
||||
{
|
||||
|
@ -152,7 +152,18 @@ namespace utf8
|
|||
return cp;
|
||||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
uint32_t prior(octet_iterator& it, octet_iterator start)
|
||||
{
|
||||
octet_iterator end = it;
|
||||
while (internal::is_trail(*(--it)))
|
||||
if (it < start)
|
||||
throw invalid_utf8(*it); // error - no lead byte in the sequence
|
||||
octet_iterator temp = it;
|
||||
return next(temp, end);
|
||||
}
|
||||
|
||||
/// Deprecated in versions that include "prior"
|
||||
template <typename octet_iterator>
|
||||
uint32_t previous(octet_iterator& it, octet_iterator pass_start)
|
||||
{
|
||||
|
@ -240,37 +251,50 @@ namespace utf8
|
|||
// The iterator class
|
||||
template <typename octet_iterator>
|
||||
class iterator {
|
||||
static const typename std::iterator_traits<octet_iterator>::difference_type MAX_UTF8_SEQUENCE_LENGTH = 4;
|
||||
octet_iterator it;
|
||||
octet_iterator range_start;
|
||||
octet_iterator range_end;
|
||||
public:
|
||||
explicit iterator (const octet_iterator& octet_it) : it(octet_it) {}
|
||||
explicit iterator (const octet_iterator& octet_it,
|
||||
const octet_iterator& range_start,
|
||||
const octet_iterator& range_end) :
|
||||
it(octet_it), range_start(range_start), range_end(range_end)
|
||||
{
|
||||
if (it < range_start || it > range_end)
|
||||
throw std::out_of_range("Invalid utf-8 iterator position");
|
||||
}
|
||||
// the default "big three" are OK
|
||||
uint32_t operator * () const
|
||||
{
|
||||
octet_iterator temp = it;
|
||||
return next(temp, temp + MAX_UTF8_SEQUENCE_LENGTH);
|
||||
return next(temp, range_end);
|
||||
}
|
||||
bool operator == (const iterator& rhs) const
|
||||
{
|
||||
if (range_start != rhs.range_start && range_end != rhs.range_end)
|
||||
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
|
||||
return (it == rhs.it);
|
||||
}
|
||||
bool operator == (const iterator& rhs) const { return (it == rhs.it); }
|
||||
iterator& operator ++ ()
|
||||
{
|
||||
next(it, it + MAX_UTF8_SEQUENCE_LENGTH);
|
||||
next(it, range_end);
|
||||
return *this;
|
||||
}
|
||||
iterator operator ++ (int)
|
||||
{
|
||||
iterator temp = *this;
|
||||
next(it, it + MAX_UTF8_SEQUENCE_LENGTH);
|
||||
next(it, range_end);
|
||||
return temp;
|
||||
}
|
||||
iterator& operator -- ()
|
||||
{
|
||||
previous(it, it - MAX_UTF8_SEQUENCE_LENGTH);
|
||||
prior(it, range_start);
|
||||
return *this;
|
||||
}
|
||||
iterator operator -- (int)
|
||||
{
|
||||
iterator temp = *this;
|
||||
previous(it, it - MAX_UTF8_SEQUENCE_LENGTH);
|
||||
prior(it, range_start);
|
||||
return temp;
|
||||
}
|
||||
}; // class iterator
|
||||
|
|
|
@ -88,13 +88,20 @@ namespace utf8
|
|||
}
|
||||
|
||||
template <typename octet_iterator>
|
||||
uint32_t previous(octet_iterator& it)
|
||||
uint32_t prior(octet_iterator& it)
|
||||
{
|
||||
while (internal::is_trail(*(--it))) ;
|
||||
octet_iterator temp = it;
|
||||
return next(temp);
|
||||
}
|
||||
|
||||
// Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
|
||||
template <typename octet_iterator>
|
||||
inline uint32_t previous(octet_iterator& it)
|
||||
{
|
||||
return prior(it);
|
||||
}
|
||||
|
||||
template <typename octet_iterator, typename distance_type>
|
||||
void advance (octet_iterator& it, distance_type n)
|
||||
{
|
||||
|
|
|
@ -22,6 +22,8 @@ int main()
|
|||
end = append(0x10346, u);
|
||||
assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0);
|
||||
|
||||
|
||||
|
||||
//next
|
||||
char* twochars = "\xe6\x97\xa5\xd1\x88";
|
||||
char* w = twochars;
|
||||
|
@ -41,8 +43,24 @@ int main()
|
|||
assert (cp == 0x0448);
|
||||
assert (w == threechars + 9);
|
||||
|
||||
//prior
|
||||
w = twochars + 3;
|
||||
cp = prior (w, twochars);
|
||||
assert (cp == 0x65e5);
|
||||
assert (w == twochars);
|
||||
|
||||
//previous
|
||||
w = threechars + 9;
|
||||
cp = prior(w, threechars);
|
||||
assert (cp == 0x0448);
|
||||
assert (w == threechars + 7);
|
||||
cp = prior(w, threechars);
|
||||
assert (cp == 0x65e5);
|
||||
assert (w == threechars + 4);
|
||||
cp = prior(w, threechars);
|
||||
assert (cp == 0x10346);
|
||||
assert (w == threechars);
|
||||
|
||||
//previous (deprecated)
|
||||
w = twochars + 3;
|
||||
cp = previous (w, twochars - 1);
|
||||
assert (cp == 0x65e5);
|
||||
|
@ -131,19 +149,19 @@ int main()
|
|||
assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.end(), fixed_invalid_sequence));
|
||||
|
||||
// iterator
|
||||
utf8::iterator<char*> it(threechars);
|
||||
utf8::iterator<char*> it(threechars, threechars, threechars + 9);
|
||||
utf8::iterator<char*> it2 = it;
|
||||
assert (it2 == it);
|
||||
assert (*it == 0x10346);
|
||||
assert (*(++it) == 0x65e5);
|
||||
assert ((*it++) == 0x65e5);
|
||||
assert (*it == 0x0448);
|
||||
utf8::iterator<char*> endit (threechars + 9);
|
||||
utf8::iterator<char*> endit (threechars + 9, threechars, threechars + 9);
|
||||
assert (++it == endit);
|
||||
assert (*(--it) == 0x0448);
|
||||
assert ((*it--) == 0x0448);
|
||||
assert (*it == 0x65e5);
|
||||
assert (--it == utf8::iterator<char*>(threechars));
|
||||
assert (--it == utf8::iterator<char*>(threechars, threechars, threechars + 9));
|
||||
assert (*it == 0x10346);
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
|
@ -179,7 +197,8 @@ int main()
|
|||
assert (w == threechars + 9);
|
||||
|
||||
|
||||
//previous
|
||||
//previous (calls prior internally)
|
||||
|
||||
w = twochars + 3;
|
||||
cp = unchecked::previous (w);
|
||||
assert (cp == 0x65e5);
|
||||
|
|
Loading…
Reference in a new issue