From 060e41eae12c6d2881eb5e3e0c627741ffe1370e Mon Sep 17 00:00:00 2001
From: ntrifunovic
UTF-8 code point.
Example of use:
-unsigned char twochars[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, 0x0}; -unsigned char* w = twochars; +char* twochars = "\xe6\x97\xa5\xd1\x88"; +char* w = twochars; int cp = next(w, twochars + 6); @@ -185,7 +185,7 @@ beginning of the string in the search for a UTF-8 lead octet.
point.Example of use:
-unsigned char twochars[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, 0x0}; +char* twochars = "\xe6\x97\xa5\xd1\x88"; unsigned char* w = twochars + 3; int cp = previous (w, twochars - 1); @@ -222,7 +222,7 @@ extraction of a code point, anutf8::not_enough_room
exception is thrown.
Example of use:
-unsigned char twochars[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, 0x0}; +char* twochars = "\xe6\x97\xa5\xd1\x88"; unsigned char* w = twochars; advance (w, 2, twochars + 6); @@ -248,7 +248,7 @@ length. It can be the beginning of a new code point, or not.
points.Example of use:
-unsigned char twochars[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, 0x0}; +char* twochars = "\xe6\x97\xa5\xd1\x88"; size_t dist = utf8::distance(twochars, twochars + 5); @@ -302,8 +302,7 @@ string where to append the result of conversion.
Return value: An iterator pointing to the place after the appended UTF-16 string.Example of use:
-unsigned char utf8_with_surrogates[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, - 0xf0, 0x9d, 0x84, 0x9e}; +char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; vector <unsigned short> utf16result; utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); @@ -354,7 +353,7 @@ string where to append the result of conversion.
Return value: An iterator pointing to the place after the appended UTF-32 string.Example of use:
-unsigned char twochars[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, 0x0}; +char* twochars = "\xe6\x97\xa5\xd1\x88"; vector<int> utf32result; utf8to32(twochars, twochars + 5, back_inserter(utf32result)); @@ -379,9 +378,9 @@ octet in the UTF-8 string. In case none were found, equalsend
.Example of use:
-unsigned char utf_invalid[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, 0xfa}; +char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa"; -unsigned char* invalid = find_invalid(utf_invalid, utf_invalid + 6); +char* invalid = find_invalid(utf_invalid, utf_invalid + 6); assert (invalid == utf_invalid + 5);@@ -401,7 +400,7 @@ string to test for validity.
UTF-8 string;false
if not. Example of use:-unsigned char utf_invalid[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, 0xfa}; +char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa"; bool bvalid = is_valid(utf_invalid, utf_invalid + 6); @@ -467,8 +466,8 @@ code point.
UTF-8 code point.Example of use:
-unsigned char twochars[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, 0x0}; -unsigned char* w = twochars; +char* twochars = "\xe6\x97\xa5\xd1\x88"; +char* w = twochars; int cp = unchecked::next(w); @@ -492,8 +491,8 @@ point to the beginning of the previous code point.
point.Example of use:
-unsigned char twochars[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, 0x0}; -unsigned char* w = twochars + 3; +char* twochars = "\xe6\x97\xa5\xd1\x88"; +char* w = twochars + 3; int cp = unchecked::previous (w); @@ -517,8 +516,8 @@ point.
we want to advance.
Example of use:
-unsigned char twochars[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, 0x0}; -unsigned char* w = twochars; +char* twochars = "\xe6\x97\xa5\xd1\x88"; +char* w = twochars; unchecked::advance (w, 2); @@ -544,7 +543,7 @@ length. It can be the beginning of a new code point, or not.
points.Example of use:
-unsigned char twochars[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, 0x0}; +char* twochars = "\xe6\x97\xa5\xd1\x88"; size_t dist = utf8::unchecked::distance(twochars, twochars + 5); @@ -593,8 +592,7 @@ string where to append the result of conversion.
Example of use:
-unsigned char utf8_with_surrogates[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, - 0xf0, 0x9d, 0x84, 0x9e}; +char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; vector <unsigned short> utf16result; unchecked::utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); @@ -646,7 +644,7 @@ string where to append the result of conversion.
Example of use:
-unsigned char twochars[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, 0x0}; +char* twochars = "\xe6\x97\xa5\xd1\x88"; vector<int> utf32result; unchecked::utf8to32(twochars, twochars + 5, back_inserter(utf32result)); diff --git a/test_drivers/smoke_test/test.cpp b/test_drivers/smoke_test/test.cpp index c769f4c..a0a3696 100644 --- a/test_drivers/smoke_test/test.cpp +++ b/test_drivers/smoke_test/test.cpp @@ -107,8 +107,8 @@ int main() assert (utf16_end == &utf16result[0] + 4); //find_invalid - unsigned char utf_invalid[] = {0xE6, 0x97, 0xA5, 0xd1, 0x88, 0xfa}; - unsigned char* invalid = find_invalid(utf_invalid, utf_invalid + 6); + char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa"; + char* invalid = find_invalid(utf_invalid, utf_invalid + 6); assert (invalid == utf_invalid + 5); //is_valid