diff --git a/doc/utf8cpp.html b/doc/utf8cpp.html index c915572..1228c49 100644 --- a/doc/utf8cpp.html +++ b/doc/utf8cpp.html @@ -6,50 +6,65 @@ +
@@ -164,25 +189,35 @@ Encodes a 32 bit code point as a UTF-8 sequence of octets and appends the sequence to a UTF-8 string.
-
-template <typename octet_iterator>
+
+template <typename octet_iterator>
octet_iterator append(uint32_t cp, octet_iterator result);
-
+
+
cp
: A 32 bit integer representing a code point to append to the
sequence.
- result
: An output iterator to the place in the sequence where to
+ result
: An output iterator to the place in the sequence where to
append the code point.
- Return value: An iterator pointing to the place after the newly appended
- sequence.
+ Return value: An iterator pointing to the place
+ after the newly appended sequence.
Example of use:
-unsigned char u[5] = {0,0,0,0,0}; -unsigned char* end = append(0x0448, u); -assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0); +unsigned char u[5] = {0,0,0,0,0}; +unsigned char* end = append(0x0448, u); +assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0);
Note that append
does not allocate any memory - it is the burden of
@@ -202,26 +237,32 @@ assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3
Given the iterator to the beginning of the UTF-8 sequence, it returns the code
point and moves the iterator to the next position.
template <typename octet_iterator> uint32_t next(octet_iterator& it,
- octet_iterator end);
++template <typename octet_iterator> +uint32_t next(octet_iterator& it, octet_iterator end); + +
it
: a reference to an iterator pointing to the beginning of an UTF-8
encoded code point. After the function returns, it is incremented to point to the
beginning of the next code point.
- end
: end of the UTF-8 sequence to be processed. If it
+ end
: end of the UTF-8 sequence to be processed. If it
gets equal to end
during the extraction of a code point, an
utf8::not_enough_room
exception is thrown.
- Return value: the 32 bit representation of the processed UTF-8 code point.
+ Return value: the 32 bit representation of the
+ processed UTF-8 code point.
Example of use:
-char* twochars = "\xe6\x97\xa5\xd1\x88"; -char* w = twochars; -int cp = next(w, twochars + 6); -assert (cp == 0x65e5); -assert (w == twochars + 3); +char* twochars = "\xe6\x97\xa5\xd1\x88"; +char* w = twochars; +int cp = next(w, twochars + 6); +assert (cp == 0x65e5); +assert (w == twochars + 3);
This function is typically used to iterate through a UTF-8 encoded string. @@ -238,26 +279,34 @@ assert (w == twochars + 3); decreases the iterator until it hits the beginning of the previous UTF-8 encoded code point and returns the 32 bits representation of the code point.
-template <typename octet_iterator> uint32_t previous(octet_iterator&
- it, octet_iterator pass_start);
++template <typename octet_iterator> +uint32_t previous(octet_iterator& it, octet_iterator pass_start); + +
it
: a reference pointing to an octet within a UTF-8 encoded string.
After the function returns, it is decremented to point to the beginning of the
previous code point.
- pass_start
: an iterator to the point in the sequence where the search
+ pass_start
: an iterator to the point in the sequence where the search
for the beginning of a code point is aborted if no result was reached. It is a
safety measure to prevent passing the beginning of the string in the search for a
UTF-8 lead octet.
- Return value: the 32 bit representation of the previous code point.
+ Return value: the 32 bit representation of the
+ previous code point.
Example of use:
-char* twochars = "\xe6\x97\xa5\xd1\x88"; -unsigned char* w = twochars + 3; -int cp = previous (w, twochars - 1); -assert (cp == 0x65e5); +char* twochars = "\xe6\x97\xa5\xd1\x88"; +unsigned char* w = twochars + 3; +int cp = previous (w, twochars - 1); +assert (cp == 0x65e5); assert (w == twochars);
@@ -280,15 +329,20 @@ assert (w == twochars); Advances an iterator by the specified number of code points within an UTF-8 sequence.
-template <typename octet_iterator, typename distance_type> void advance
- (octet_iterator& it, distance_type n, octet_iterator end);
++template <typename octet_iterator, typename distance_type> +void advance (octet_iterator& it, distance_type n, octet_iterator end); + +
it
: a reference to an iterator pointing to the beginning of an UTF-8
encoded code point. After the function returns, it is incremented to point to the
nth following code point.
- n
: a positive integer that shows how many code points we want to
+ n
: a positive integer that shows how many code points we want to
advance.
- end
: end of the UTF-8 sequence to be processed. If it
+ end
: end of the UTF-8 sequence to be processed. If it
gets equal to end
during the extraction of a code point, an
utf8::not_enough_room
exception is thrown.
-char* twochars = "\xe6\x97\xa5\xd1\x88"; -unsigned char* w = twochars; -advance (w, 2, twochars + 6); -assert (w == twochars + 5); +char* twochars = "\xe6\x97\xa5\xd1\x88"; +unsigned char* w = twochars; +advance (w, 2, twochars + 6); +assert (w == twochars + 5);
This function works only "forward". In case of a negative n
, there is
@@ -316,23 +371,29 @@ assert (w == twochars + 5);
Given the iterators to two UTF-8 encoded code points in a seqence, returns the
number of code points between them.
template <typename octet_iterator> typename
- std::iterator_traits<octet_iterator>::difference_type distance (octet_iterator
- first, octet_iterator last);
++template <typename octet_iterator> +typename std::iterator_traits<octet_iterator>::difference_type distance (octet_iterator first, octet_iterator last); + +
first
: an iterator to a beginning of a UTF-8 encoded code point.
- last
: an iterator to a "post-end" of the last UTF-8 encoded code point
- in the sequence we are trying to determine the length. It can be the beginning of a
- new code point, or not.
- Return value the distance between the iterators, in code points.
+ last
: an iterator to a "post-end" of the last UTF-8 encoded code
+ point in the sequence we are trying to determine the length. It can be the
+ beginning of a new code point, or not.
+ Return value the distance between the iterators,
+ in code points.
Example of use:
-char* twochars = "\xe6\x97\xa5\xd1\x88"; -size_t dist = utf8::distance(twochars, twochars + 5); -assert (dist == 2); +char* twochars = "\xe6\x97\xa5\xd1\x88"; +size_t dist = utf8::distance(twochars, twochars + 5); +assert (dist == 2);
This function is used to find the length (in code points) of a UTF-8 encoded @@ -352,27 +413,35 @@ assert (dist == 2);
Converts a UTF-16 encoded string to UTF-8.
-template <typename u16bit_iterator, typename octet_iterator>
- octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator
- result);
++template <typename u16bit_iterator, typename octet_iterator> +octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result); + +
start
: an iterator pointing to the beginning of the UTF-16 encoded
string to convert.
- end
: an iterator pointing to pass-the-end of the UTF-16 encoded string
- to convert.
- result
: an output iterator to the place in the UTF-8 string where to
+ end
: an iterator pointing to pass-the-end of the UTF-16 encoded
+ string to convert.
+ result
: an output iterator to the place in the UTF-8 string where to
append the result of conversion.
- Return value: An iterator pointing to the place after the appended UTF-8
- string.
+ Return value: An iterator pointing to the place
+ after the appended UTF-8 string.
Example of use:
-unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e}; -vector<unsigned char> utf8result; -utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); -assert (utf8result.size() == 10); +unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e}; +vector<unsigned char> utf8result; +utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); +assert (utf8result.size() == 10);
In case of invalid UTF-16 sequence, a utf8::invalid_utf16
exception is
@@ -384,28 +453,35 @@ assert (utf8result.size() == 10);
Converts an UTF-8 encoded string to UTF-16
-template <typename u16bit_iterator, typename octet_iterator>
- u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator
- result);
++template <typename u16bit_iterator, typename octet_iterator> +u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result); + +
start
: an iterator pointing to the beginning of the UTF-8 encoded
string to convert. < br /> end
: an iterator pointing to
pass-the-end of the UTF-8 encoded string to convert.
- result
: an output iterator to the place in the UTF-16 string where to
+ result
: an output iterator to the place in the UTF-16 string where to
append the result of conversion.
- Return value: An iterator pointing to the place after the appended UTF-16
- string.
+ Return value: An iterator pointing to the place
+ after the appended UTF-16 string.
Example of use:
-char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; -vector <unsigned short> utf16result; -utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); -assert (utf16result.size() == 4); -assert (utf16result[2] == 0xd834); -assert (utf16result[3] == 0xdd1e); +char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; +vector <unsigned short> utf16result; +utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); +assert (utf16result.size() == 4); +assert (utf16result[2] == 0xd834); +assert (utf16result[3] == 0xdd1e);
In case of an invalid UTF-8 seqence, a utf8::invalid_utf8
exception is
@@ -418,27 +494,33 @@ assert (utf16result[3] == 0xdd1e);
Converts a UTF-32 encoded string to UTF-8.
-template <typename octet_iterator, typename u32bit_iterator>
- octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator
- result);
++template <typename octet_iterator, typename u32bit_iterator> +octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result); + +
start
: an iterator pointing to the beginning of the UTF-32 encoded
string to convert.
- end
: an iterator pointing to pass-the-end of the UTF-32 encoded string
- to convert.
- result
: an output iterator to the place in the UTF-8 string where to
+ end
: an iterator pointing to pass-the-end of the UTF-32 encoded
+ string to convert.
+ result
: an output iterator to the place in the UTF-8 string where to
append the result of conversion.
- Return value: An iterator pointing to the place after the appended UTF-8
- string.
+ Return value: An iterator pointing to the place
+ after the appended UTF-8 string.
Example of use:
-int utf32string[] = {0x448, 0x65E5, 0x10346, 0}; -vector<unsigned char> utf8result; -utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); -assert (utf8result.size() == 9); +int utf32string[] = {0x448, 0x65E5, 0x10346, 0}; +vector<unsigned char> utf8result; +utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); +assert (utf8result.size() == 9);
In case of invalid UTF-32 string, a utf8::invalid_code_point
exception
@@ -450,27 +532,33 @@ assert (utf8result.size() == 9);
Converts a UTF-8 encoded string to UTF-32.
-template <typename octet_iterator, typename u32bit_iterator>
- u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator
- result);
++template <typename octet_iterator, typename u32bit_iterator> +u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result); + +
start
: an iterator pointing to the beginning of the UTF-8 encoded
string to convert.
- end
: an iterator pointing to pass-the-end of the UTF-8 encoded string
+ end
: an iterator pointing to pass-the-end of the UTF-8 encoded string
to convert.
- result
: an output iterator to the place in the UTF-32 string where to
+ result
: an output iterator to the place in the UTF-32 string where to
append the result of conversion.
- Return value: An iterator pointing to the place after the appended UTF-32
- string.
+ Return value: An iterator pointing to the place
+ after the appended UTF-32 string.
Example of use:
-char* twochars = "\xe6\x97\xa5\xd1\x88"; -vector<int> utf32result; -utf8to32(twochars, twochars + 5, back_inserter(utf32result)); -assert (utf32result.size() == 2); +char* twochars = "\xe6\x97\xa5\xd1\x88"; +vector<int> utf32result; +utf8to32(twochars, twochars + 5, back_inserter(utf32result)); +assert (utf32result.size() == 2);
In case of an invalid UTF-8 seqence, a utf8::invalid_utf8
exception is
@@ -483,23 +571,30 @@ assert (utf32result.size() == 2);
Detects an invalid sequence within a UTF-8 string.
-template <typename octet_iterator> octet_iterator
- find_invalid(octet_iterator start, octet_iterator end);
++template <typename octet_iterator> +octet_iterator find_invalid(octet_iterator start, octet_iterator end); +
start
: an iterator pointing to the beginning of the UTF-8 string to
test for validity.
- end
: an iterator pointing to pass-the-end of the UTF-8 string to test
+ end
: an iterator pointing to pass-the-end of the UTF-8 string to test
for validity.
- Return value: an iterator pointing to the first invalid octet in the UTF-8
- string. In case none were found, equals end
.
+ Return value: an iterator pointing to the first
+ invalid octet in the UTF-8 string. In case none were found, equals
+ end
.
Example of use:
-char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa"; -char* invalid = find_invalid(utf_invalid, utf_invalid + 6); -assert (invalid == utf_invalid + 5); +char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa"; +char* invalid = find_invalid(utf_invalid, utf_invalid + 6); +assert (invalid == utf_invalid + 5);
This function is typically used to make sure a UTF-8 string is valid before @@ -512,20 +607,26 @@ assert (invalid == utf_invalid + 5);
Checks whether a sequence of octets is a valid UTF-8 string.
-template <typename octet_iterator> bool is_valid(octet_iterator start,
- octet_iterator end);
++template <typename octet_iterator> +bool is_valid(octet_iterator start, octet_iterator end); + +
start
: an iterator pointing to the beginning of the UTF-8 string to
test for validity.
- end
: an iterator pointing to pass-the-end of the UTF-8 string to test
+ end
: an iterator pointing to pass-the-end of the UTF-8 string to test
for validity.
- Return value: true
if the sequence is a valid UTF-8 string;
- false
if not.
+ Return value: true
if the sequence
+ is a valid UTF-8 string; false
if not.
-char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa"; -bool bvalid = is_valid(utf_invalid, utf_invalid + 6); +char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa"; +bool bvalid = is_valid(utf_invalid, utf_invalid + 6); assert (bvalid == false);
@@ -539,38 +640,42 @@ assert (bvalid == false);
Replaces all invalid UTF-8 sequences within a string with a replacement marker.
-
- template <typename octet_iterator, typename output_iterator>
- output_iterator replace_invalid(octet_iterator start, octet_iterator end,
- output_iterator out, uint32_t replacement);
-
- template <typename octet_iterator, typename output_iterator>
- output_iterator replace_invalid(octet_iterator start, octet_iterator end,
- output_iterator out);
-
+template <typename octet_iterator, typename output_iterator> +output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement); +template <typename octet_iterator, typename output_iterator> +output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out); + +
start
: an iterator pointing to the beginning of the UTF-8 string to
look for invalid UTF-8 sequences.
- end
: an iterator pointing to pass-the-end of the UTF-8 string to look
+ end
: an iterator pointing to pass-the-end of the UTF-8 string to look
for invalid UTF-8 sequences.
- out
: An output iterator to the range where the result of replacement
+ out
: An output iterator to the range where the result of replacement
is stored.
- replacement
: A Unicode code point for the replacement marker. The
+ replacement
: A Unicode code point for the replacement marker. The
version without this parameter assumes the value 0xfffd
- Return value: An iterator pointing to the place after the UTF-8 string with
- replaced invalid sequences.
+ Return value: An iterator pointing to the place
+ after the UTF-8 string with replaced invalid sequences.
Example of use:
-char invalid_sequence[] = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z"; -vector<char> replace_invalid_result; -replace_invalid (invalid_sequence, invalid_sequence + sizeof(invalid_sequence), back_inserter(replace_invalid_result), '?'); +char invalid_sequence[] = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z"; +vector<char> replace_invalid_result; +replace_invalid (invalid_sequence, invalid_sequence + sizeof(invalid_sequence), back_inserter(replace_invalid_result), '?'); bvalid = is_valid(replace_invalid_result.begin(), replace_invalid_result.end()); assert (bvalid); -char* fixed_invalid_sequence = "a????z"; +char* fixed_invalid_sequence = "a????z"; assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.end(), fixed_invalid_sequence));
@@ -589,20 +694,25 @@ assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.end(),
Checks whether a sequence of three octets is a UTF-8 byte order mark (BOM)
-template <typename octet_iterator> bool is_bom (octet_iterator
- it);
++template <typename octet_iterator> +bool is_bom (octet_iterator it); +
it
: beginning of the 3-octet sequence to check
- Return value: true
if the sequence is UTF-8 byte order mark;
- false
if not.
+ Return value: true
if the sequence
+ is UTF-8 byte order mark; false
if not.
Example of use:
-unsigned char byte_order_mark[] = {0xef, 0xbb, 0xbf}; -bool bbom = is_bom(byte_order_mark); -assert (bbom == true); +unsigned char byte_order_mark[] = {0xef, 0xbb, 0xbf}; +bool bbom = is_bom(byte_order_mark); +assert (bbom == true);
The typical use of this function is to check the first three bytes of a file. If @@ -619,23 +729,35 @@ assert (bbom == true); Encodes a 32 bit code point as a UTF-8 sequence of octets and appends the sequence to a UTF-8 string.
-template <typename octet_iterator> octet_iterator append(uint32_t cp,
- octet_iterator result);
++template <typename octet_iterator> +octet_iterator append(uint32_t cp, octet_iterator result); + +
cp
: A 32 bit integer representing a code point to append to the
sequence.
- result
: An output iterator to the place in the sequence where to
+ result
: An output iterator to the place in the sequence where to
append the code point.
- Return value: An iterator pointing to the place after the newly appended
- sequence.
+ Return value: An iterator pointing to the place
+ after the newly appended sequence.
Example of use:
-unsigned char u[5] = {0,0,0,0,0}; -unsigned char* end = unchecked::append(0x0448, u); -assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0); +unsigned char u[5] = {0,0,0,0,0}; +unsigned char* end = unchecked::append(0x0448, u); +assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0);
This is a quicker but less safe version of utf8::append
. It does not
@@ -649,23 +771,29 @@ assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3
Given the iterator to the beginning of a UTF-8 sequence, it returns the code point
and moves the iterator to the next position.
template <typename octet_iterator> uint32_t next(octet_iterator&
- it);
++template <typename octet_iterator> +uint32_t next(octet_iterator& it); + +
it
: a reference to an iterator pointing to the beginning of an UTF-8
encoded code point. After the function returns, it is incremented to point to the
beginning of the next code point.
- Return value: the 32 bit representation of the processed UTF-8 code point.
+ Return value: the 32 bit representation of the
+ processed UTF-8 code point.
Example of use:
-char* twochars = "\xe6\x97\xa5\xd1\x88"; -char* w = twochars; -int cp = unchecked::next(w); -assert (cp == 0x65e5); -assert (w == twochars + 3); +char* twochars = "\xe6\x97\xa5\xd1\x88"; +char* w = twochars; +int cp = unchecked::next(w); +assert (cp == 0x65e5); +assert (w == twochars + 3);
This is a quicker but less safe version of utf8::next
. It does not
@@ -679,22 +807,28 @@ assert (w == twochars + 3);
decreases the iterator until it hits the beginning of the previous UTF-8 encoded
code point and returns the 32 bits representation of the code point.
template <typename octet_iterator> uint32_t previous(octet_iterator&
- it);
++template <typename octet_iterator> +uint32_t previous(octet_iterator& it); + +
it
: a reference pointing to an octet within a UTF-8 encoded string.
After the function returns, it is decremented to point to the beginning of the
previous code point.
- Return value: the 32 bit representation of the previous code point.
+ Return value: the 32 bit representation of the
+ previous code point.
Example of use:
-char* twochars = "\xe6\x97\xa5\xd1\x88"; -char* w = twochars + 3; -int cp = unchecked::previous (w); -assert (cp == 0x65e5); +char* twochars = "\xe6\x97\xa5\xd1\x88"; +char* w = twochars + 3; +int cp = unchecked::previous (w); +assert (cp == 0x65e5); assert (w == twochars);
@@ -708,23 +842,28 @@ assert (w == twochars); Advances an iterator by the specified number of code points within an UTF-8 sequence.
-template <typename octet_iterator, typename distance_type> void advance
- (octet_iterator& it, distance_type n);
++template <typename octet_iterator, typename distance_type> +void advance (octet_iterator& it, distance_type n); + +
it
: a reference to an iterator pointing to the beginning of an UTF-8
encoded code point. After the function returns, it is incremented to point to the
nth following code point.
- n
: a positive integer that shows how many code points we want to
+ n
: a positive integer that shows how many code points we want to
advance.
Example of use:
-char* twochars = "\xe6\x97\xa5\xd1\x88"; -char* w = twochars; -unchecked::advance (w, 2); -assert (w == twochars + 5); +char* twochars = "\xe6\x97\xa5\xd1\x88"; +char* w = twochars; +unchecked::advance (w, 2); +assert (w == twochars + 5);
This function works only "forward". In case of a negative n
, there is
@@ -741,23 +880,29 @@ assert (w == twochars + 5);
Given the iterators to two UTF-8 encoded code points in a seqence, returns the
number of code points between them.
template <typename octet_iterator> typename
- std::iterator_traits<octet_iterator>::difference_type distance (octet_iterator
- first, octet_iterator last);
++template <typename octet_iterator> +typename std::iterator_traits<octet_iterator>::difference_type distance (octet_iterator first, octet_iterator last); +
first
: an iterator to a beginning of a UTF-8 encoded code point.
- last
: an iterator to a "post-end" of the last UTF-8 encoded code point
- in the sequence we are trying to determine the length. It can be the beginning of a
- new code point, or not.
- Return value the distance between the iterators, in code points.
+ last
: an iterator to a "post-end" of the last UTF-8 encoded code
+ point in the sequence we are trying to determine the length. It can be the
+ beginning of a new code point, or not.
+ Return value the distance between the iterators,
+ in code points.
Example of use:
-char* twochars = "\xe6\x97\xa5\xd1\x88"; -size_t dist = utf8::unchecked::distance(twochars, twochars + 5); -assert (dist == 2); +char* twochars = "\xe6\x97\xa5\xd1\x88"; +size_t dist = utf8::unchecked::distance(twochars, twochars + 5); +assert (dist == 2);
This is a quicker but less safe version of utf8::distance
. It does not
@@ -769,26 +914,35 @@ assert (dist == 2);
Converts a UTF-16 encoded string to UTF-8.
-template <typename u16bit_iterator, typename octet_iterator>
- octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator
- result);
++template <typename u16bit_iterator, typename octet_iterator> +octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result); + +
start
: an iterator pointing to the beginning of the UTF-16 encoded
string to convert.
- end
: an iterator pointing to pass-the-end of the UTF-16 encoded string
- to convert.
- result
: an output iterator to the place in the UTF-8 string where to
- append the result of conversion. Return value: An iterator pointing to the
- place after the appended UTF-8 string.
+ end
: an iterator pointing to pass-the-end of the UTF-16 encoded
+ string to convert.
+ result
: an output iterator to the place in the UTF-8 string where to
+ append the result of conversion.
+ Return value: An iterator pointing to the place
+ after the appended UTF-8 string.
Example of use:
-unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e}; -vector<unsigned char> utf8result; -unchecked::utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); -assert (utf8result.size() == 10); +unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e}; +vector<unsigned char> utf8result; +unchecked::utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); +assert (utf8result.size() == 10);
This is a quicker but less safe version of utf8::utf16to8
. It does not
@@ -800,28 +954,35 @@ assert (utf8result.size() == 10);
Converts an UTF-8 encoded string to UTF-16
-template <typename u16bit_iterator, typename octet_iterator>
- u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator
- result);
++template <typename u16bit_iterator, typename octet_iterator> +u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result); + +
start
: an iterator pointing to the beginning of the UTF-8 encoded
string to convert. < br /> end
: an iterator pointing to
pass-the-end of the UTF-8 encoded string to convert.
- result
: an output iterator to the place in the UTF-16 string where to
+ result
: an output iterator to the place in the UTF-16 string where to
append the result of conversion.
- Return value: An iterator pointing to the place after the appended UTF-16
- string.
+ Return value: An iterator pointing to the place
+ after the appended UTF-16 string.
Example of use:
-char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; -vector <unsigned short> utf16result; -unchecked::utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); -assert (utf16result.size() == 4); -assert (utf16result[2] == 0xd834); -assert (utf16result[3] == 0xdd1e); +char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; +vector <unsigned short> utf16result; +unchecked::utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); +assert (utf16result.size() == 4); +assert (utf16result[2] == 0xd834); +assert (utf16result[3] == 0xdd1e);
This is a quicker but less safe version of utf8::utf8to16
. It does not
@@ -833,27 +994,34 @@ assert (utf16result[3] == 0xdd1e);
Converts a UTF-32 encoded string to UTF-8.
-template <typename octet_iterator, typename u32bit_iterator>
- octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator
- result);
++template <typename octet_iterator, typename u32bit_iterator> +octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result); + +
start
: an iterator pointing to the beginning of the UTF-32 encoded
string to convert.
- end
: an iterator pointing to pass-the-end of the UTF-32 encoded string
- to convert.
- result
: an output iterator to the place in the UTF-8 string where to
+ end
: an iterator pointing to pass-the-end of the UTF-32 encoded
+ string to convert.
+ result
: an output iterator to the place in the UTF-8 string where to
append the result of conversion.
- Return value: An iterator pointing to the place after the appended UTF-8
- string.
+ Return value: An iterator pointing to the place
+ after the appended UTF-8 string.
Example of use:
-int utf32string[] = {0x448, 0x65E5, 0x10346, 0}; -vector<unsigned char> utf8result; -utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); -assert (utf8result.size() == 9); +int utf32string[] = {0x448, 0x65e5, 0x10346, 0}; +vector<unsigned char> utf8result; +utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); +assert (utf8result.size() == 9);
This is a quicker but less safe version of utf8::utf32to8
. It does not
@@ -865,27 +1033,32 @@ assert (utf8result.size() == 9);
Converts a UTF-8 encoded string to UTF-32.
-template <typename octet_iterator, typename u32bit_iterator>
- u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator
- result);
++template <typename octet_iterator, typename u32bit_iterator> +u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result); + +
start
: an iterator pointing to the beginning of the UTF-8 encoded
string to convert.
- end
: an iterator pointing to pass-the-end of the UTF-8 encoded string
+ end
: an iterator pointing to pass-the-end of the UTF-8 encoded string
to convert.
- result
: an output iterator to the place in the UTF-32 string where to
+ result
: an output iterator to the place in the UTF-32 string where to
append the result of conversion.
- Return value: An iterator pointing to the place after the appended UTF-32
- string.
+ Return value: An iterator pointing to the place
+ after the appended UTF-32 string.
Example of use:
-char* twochars = "\xe6\x97\xa5\xd1\x88"; -vector<int> utf32result; -unchecked::utf8to32(twochars, twochars + 5, back_inserter(utf32result)); -assert (utf32result.size() == 2); +char* twochars = "\xe6\x97\xa5\xd1\x88"; +vector<int> utf32result; +unchecked::utf8to32(twochars, twochars + 5, back_inserter(utf32result)); +assert (utf32result.size() == 2);
This is a quicker but less safe version of utf8::utf8to32
. It does not
diff --git a/source/utf8/checked.h b/source/utf8/checked.h
index 980be27..4647016 100644
--- a/source/utf8/checked.h
+++ b/source/utf8/checked.h
@@ -29,7 +29,7 @@ DEALINGS IN THE SOFTWARE.
#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include "core.h"
-#include