Changed conversion functions to return the position of last inserted character/byte

git-svn-id: http://svn.code.sf.net/p/utfcpp/code@44 a809a056-fc17-0410-9590-b4f493f8b08e
This commit is contained in:
ntrifunovic 2006-08-15 01:54:49 +00:00
parent 5658c96996
commit d7ef2ce0a9
2 changed files with 48 additions and 13 deletions

View file

@ -339,7 +339,7 @@ namespace internal
}
template <typename u16bit_iterator, typename octet_iterator>
void utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
{
while (start != end) {
uint32_t cp = internal::mask16(*start++);
@ -357,11 +357,12 @@ namespace internal
}
result = append(cp, result);
}
}
return result;
}
template <typename u16bit_iterator, typename octet_iterator>
void utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
{
while (start != end) {
uint32_t cp = next(start, end);
@ -372,20 +373,25 @@ namespace internal
else
*result++ = static_cast<uint16_t>(cp);
}
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
void utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
{
while (start != end)
result = append(*(start++), result);
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
void utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
{
while (start < end)
(*result++) = next(start, end);
return result;
}
namespace unchecked
@ -472,7 +478,7 @@ namespace internal
}
template <typename u16bit_iterator, typename octet_iterator>
void utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
{
while (start != end) {
uint32_t cp = internal::mask16(*start++);
@ -482,11 +488,12 @@ namespace internal
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
}
result = append(cp, result);
}
}
return result;
}
template <typename u16bit_iterator, typename octet_iterator>
void utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
{
while (start != end) {
uint32_t cp = next(start);
@ -497,20 +504,25 @@ namespace internal
else
*result++ = static_cast<uint16_t>(cp);
}
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
void utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
{
while (start != end)
result = append(*(start++), result);
return result;
}
template <typename octet_iterator, typename u32bit_iterator>
void utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
{
while (start < end)
(*result++) = next(start);
return result;
}
} // namespace utf8::unchecked

View file

@ -71,29 +71,40 @@ int main()
// utf32to8
int utf32string[] = {0x448, 0x65E5, 0x10346, 0};
vector<unsigned char> utf8result;
vector<char> utf8result;
utf32to8(utf32string, utf32string + 3, back_inserter(utf8result));
assert (utf8result.size() == 9);
// try it with the return value;
char* utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]);
assert (utf8_end == &utf8result[0] + 9);
//utf8to32
vector<int> utf32result;
utf8to32(twochars, twochars + 5, back_inserter(utf32result));
assert (utf32result.size() == 2);
// try it with the return value;
int* utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]);
assert (utf32_end == &utf32result[0] + 2);
//utf16to8
unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e};
utf8result.clear();
utf16to8(utf16string, utf16string + 5, back_inserter(utf8result));
assert (utf8result.size() == 10);
// try it with the return value;
utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]);
assert (utf8_end == &utf8result[0] + 10);
//utf8to16
unsigned char utf8_with_surrogates[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88,
0xf0, 0x9d, 0x84, 0x9e};
char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
vector <unsigned short> utf16result;
utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result));
assert (utf16result.size() == 4);
assert (utf16result[2] == 0xd834);
assert (utf16result[3] == 0xdd1e);
// try it with the return value;
unsigned short* utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]);
assert (utf16_end == &utf16result[0] + 4);
//find_invalid
unsigned char utf_invalid[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, 0xfa};
@ -174,16 +185,25 @@ int main()
utf8result.clear();
unchecked::utf32to8(utf32string, utf32string + 3, back_inserter(utf8result));
assert (utf8result.size() == 9);
// try it with the return value;
utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]);
assert(utf8_end == &utf8result[0] + 9);
//utf8to32
utf32result.clear();
unchecked::utf8to32(twochars, twochars + 5, back_inserter(utf32result));
assert (utf32result.size() == 2);
// try it with the return value;
utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]);
assert (utf32_end == &utf32result[0] + 2);
//utf16to8
utf8result.clear();
unchecked::utf16to8(utf16string, utf16string + 5, back_inserter(utf8result));
assert (utf8result.size() == 10);
// try it with the return value;
utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]);
assert (utf8_end == &utf8result[0] + 10);
//utf8to16
utf16result.clear();
@ -191,6 +211,9 @@ int main()
assert (utf16result.size() == 4);
assert (utf16result[2] == 0xd834);
assert (utf16result[3] == 0xdd1e);
// try it with the return value;
utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]);
assert (utf16_end == &utf16result[0] + 4);
}