Added the checked iterator, function prior that replaces previous, and updated the html documentation

git-svn-id: http://svn.code.sf.net/p/utfcpp/code@66 a809a056-fc17-0410-9590-b4f493f8b08e
This commit is contained in:
ntrifunovic 2006-11-04 01:28:38 +00:00
parent d2ee7164b6
commit e4dc80dae3
5 changed files with 1549 additions and 953 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -29,7 +29,7 @@ DEALINGS IN THE SOFTWARE.
#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include "core.h"
#include <exception>
#include <stdexcept>
namespace utf8
{
@ -152,7 +152,18 @@ namespace utf8
return cp;
}
template <typename octet_iterator>
uint32_t prior(octet_iterator& it, octet_iterator start)
{
octet_iterator end = it;
while (internal::is_trail(*(--it)))
if (it < start)
throw invalid_utf8(*it); // error - no lead byte in the sequence
octet_iterator temp = it;
return next(temp, end);
}
/// Deprecated in versions that include "prior"
template <typename octet_iterator>
uint32_t previous(octet_iterator& it, octet_iterator pass_start)
{
@ -240,37 +251,50 @@ namespace utf8
// The iterator class
template <typename octet_iterator>
class iterator {
static const typename std::iterator_traits<octet_iterator>::difference_type MAX_UTF8_SEQUENCE_LENGTH = 4;
octet_iterator it;
octet_iterator range_start;
octet_iterator range_end;
public:
explicit iterator (const octet_iterator& octet_it) : it(octet_it) {}
explicit iterator (const octet_iterator& octet_it,
const octet_iterator& range_start,
const octet_iterator& range_end) :
it(octet_it), range_start(range_start), range_end(range_end)
{
if (it < range_start || it > range_end)
throw std::out_of_range("Invalid utf-8 iterator position");
}
// the default "big three" are OK
uint32_t operator * () const
{
octet_iterator temp = it;
return next(temp, temp + MAX_UTF8_SEQUENCE_LENGTH);
return next(temp, range_end);
}
bool operator == (const iterator& rhs) const
{
if (range_start != rhs.range_start && range_end != rhs.range_end)
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
return (it == rhs.it);
}
bool operator == (const iterator& rhs) const { return (it == rhs.it); }
iterator& operator ++ ()
{
next(it, it + MAX_UTF8_SEQUENCE_LENGTH);
next(it, range_end);
return *this;
}
iterator operator ++ (int)
{
iterator temp = *this;
next(it, it + MAX_UTF8_SEQUENCE_LENGTH);
next(it, range_end);
return temp;
}
iterator& operator -- ()
{
previous(it, it - MAX_UTF8_SEQUENCE_LENGTH);
prior(it, range_start);
return *this;
}
iterator operator -- (int)
{
iterator temp = *this;
previous(it, it - MAX_UTF8_SEQUENCE_LENGTH);
prior(it, range_start);
return temp;
}
}; // class iterator

View file

@ -88,13 +88,20 @@ namespace utf8
}
template <typename octet_iterator>
uint32_t previous(octet_iterator& it)
uint32_t prior(octet_iterator& it)
{
while (internal::is_trail(*(--it))) ;
octet_iterator temp = it;
return next(temp);
}
// Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
template <typename octet_iterator>
inline uint32_t previous(octet_iterator& it)
{
return prior(it);
}
template <typename octet_iterator, typename distance_type>
void advance (octet_iterator& it, distance_type n)
{

View file

@ -22,6 +22,8 @@ int main()
end = append(0x10346, u);
assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0);
//next
char* twochars = "\xe6\x97\xa5\xd1\x88";
char* w = twochars;
@ -41,8 +43,24 @@ int main()
assert (cp == 0x0448);
assert (w == threechars + 9);
//prior
w = twochars + 3;
cp = prior (w, twochars);
assert (cp == 0x65e5);
assert (w == twochars);
//previous
w = threechars + 9;
cp = prior(w, threechars);
assert (cp == 0x0448);
assert (w == threechars + 7);
cp = prior(w, threechars);
assert (cp == 0x65e5);
assert (w == threechars + 4);
cp = prior(w, threechars);
assert (cp == 0x10346);
assert (w == threechars);
//previous (deprecated)
w = twochars + 3;
cp = previous (w, twochars - 1);
assert (cp == 0x65e5);
@ -131,19 +149,19 @@ int main()
assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.end(), fixed_invalid_sequence));
// iterator
utf8::iterator<char*> it(threechars);
utf8::iterator<char*> it(threechars, threechars, threechars + 9);
utf8::iterator<char*> it2 = it;
assert (it2 == it);
assert (*it == 0x10346);
assert (*(++it) == 0x65e5);
assert ((*it++) == 0x65e5);
assert (*it == 0x0448);
utf8::iterator<char*> endit (threechars + 9);
utf8::iterator<char*> endit (threechars + 9, threechars, threechars + 9);
assert (++it == endit);
assert (*(--it) == 0x0448);
assert ((*it--) == 0x0448);
assert (*it == 0x65e5);
assert (--it == utf8::iterator<char*>(threechars));
assert (--it == utf8::iterator<char*>(threechars, threechars, threechars + 9));
assert (*it == 0x10346);
//////////////////////////////////////////////////////////
@ -179,7 +197,8 @@ int main()
assert (w == threechars + 9);
//previous
//previous (calls prior internally)
w = twochars + 3;
cp = unchecked::previous (w);
assert (cp == 0x65e5);