From d7ef2ce0a92251abee067fbf222113c8cf5df492 Mon Sep 17 00:00:00 2001 From: ntrifunovic Date: Tue, 15 Aug 2006 01:54:49 +0000 Subject: [PATCH] Changed conversion functions to return the position of last inserted character/byte git-svn-id: http://svn.code.sf.net/p/utfcpp/code@44 a809a056-fc17-0410-9590-b4f493f8b08e --- source/utf8.h | 32 ++++++++++++++++++++++---------- test_drivers/smoke_test/test.cpp | 29 ++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/source/utf8.h b/source/utf8.h index 9b5b9c4..a775ef0 100644 --- a/source/utf8.h +++ b/source/utf8.h @@ -339,7 +339,7 @@ namespace internal } template - void utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) + octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) { while (start != end) { uint32_t cp = internal::mask16(*start++); @@ -357,11 +357,12 @@ namespace internal } result = append(cp, result); - } + } + return result; } template - void utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) + u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) { while (start != end) { uint32_t cp = next(start, end); @@ -372,20 +373,25 @@ namespace internal else *result++ = static_cast(cp); } + return result; } template - void utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) + octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) { while (start != end) result = append(*(start++), result); + + return result; } template - void utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) + u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) { while (start < end) (*result++) = next(start, end); + + return result; } namespace unchecked @@ -472,7 +478,7 @@ namespace internal } template - void utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) + octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) { while (start != end) { uint32_t cp = internal::mask16(*start++); @@ -482,11 +488,12 @@ namespace internal cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; } result = append(cp, result); - } + } + return result; } template - void utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) + u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) { while (start != end) { uint32_t cp = next(start); @@ -497,20 +504,25 @@ namespace internal else *result++ = static_cast(cp); } + return result; } template - void utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) + octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) { while (start != end) result = append(*(start++), result); + + return result; } template - void utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) + u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) { while (start < end) (*result++) = next(start); + + return result; } } // namespace utf8::unchecked diff --git a/test_drivers/smoke_test/test.cpp b/test_drivers/smoke_test/test.cpp index f368c90..c769f4c 100644 --- a/test_drivers/smoke_test/test.cpp +++ b/test_drivers/smoke_test/test.cpp @@ -71,29 +71,40 @@ int main() // utf32to8 int utf32string[] = {0x448, 0x65E5, 0x10346, 0}; - vector utf8result; + vector utf8result; utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); assert (utf8result.size() == 9); + // try it with the return value; + char* utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]); + assert (utf8_end == &utf8result[0] + 9); //utf8to32 vector utf32result; utf8to32(twochars, twochars + 5, back_inserter(utf32result)); assert (utf32result.size() == 2); + // try it with the return value; + int* utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]); + assert (utf32_end == &utf32result[0] + 2); //utf16to8 unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e}; utf8result.clear(); utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); assert (utf8result.size() == 10); + // try it with the return value; + utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]); + assert (utf8_end == &utf8result[0] + 10); //utf8to16 - unsigned char utf8_with_surrogates[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, - 0xf0, 0x9d, 0x84, 0x9e}; + char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; vector utf16result; utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); assert (utf16result.size() == 4); assert (utf16result[2] == 0xd834); assert (utf16result[3] == 0xdd1e); + // try it with the return value; + unsigned short* utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]); + assert (utf16_end == &utf16result[0] + 4); //find_invalid unsigned char utf_invalid[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, 0xfa}; @@ -174,16 +185,25 @@ int main() utf8result.clear(); unchecked::utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); assert (utf8result.size() == 9); + // try it with the return value; + utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]); + assert(utf8_end == &utf8result[0] + 9); //utf8to32 utf32result.clear(); unchecked::utf8to32(twochars, twochars + 5, back_inserter(utf32result)); assert (utf32result.size() == 2); + // try it with the return value; + utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]); + assert (utf32_end == &utf32result[0] + 2); //utf16to8 utf8result.clear(); unchecked::utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); assert (utf8result.size() == 10); + // try it with the return value; + utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]); + assert (utf8_end == &utf8result[0] + 10); //utf8to16 utf16result.clear(); @@ -191,6 +211,9 @@ int main() assert (utf16result.size() == 4); assert (utf16result[2] == 0xd834); assert (utf16result[3] == 0xdd1e); + // try it with the return value; + utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]); + assert (utf16_end == &utf16result[0] + 4); }