diff --git a/v2_0/source/utf8/checked.h b/v2_0/source/utf8/checked.h index 5670c19..88fe73e 100644 --- a/v2_0/source/utf8/checked.h +++ b/v2_0/source/utf8/checked.h @@ -64,7 +64,7 @@ namespace utf8 }; /// The library API - functions intended to be called by the users - + template output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement) { @@ -72,7 +72,7 @@ namespace utf8 octet_iterator sequence_start = start; internal::utf_error err_code = internal::validate_next(start, end); switch (err_code) { - case internal::OK : + case internal::UTF8_OK : for (octet_iterator it = sequence_start; it != start; ++it) *out++ = *it; break; @@ -92,7 +92,7 @@ namespace utf8 ++start; break; } - } + } return out; } @@ -106,11 +106,11 @@ namespace utf8 template octet_iterator append(uint32_t cp, octet_iterator result) { - if (!internal::is_code_point_valid(cp)) + if (!internal::is_code_point_valid(cp)) throw invalid_code_point(cp); if (cp < 0x80) // one octet - *(result++) = static_cast(cp); + *(result++) = static_cast(cp); else if (cp < 0x800) { // two octets *(result++) = static_cast((cp >> 6) | 0xc0); *(result++) = static_cast((cp & 0x3f) | 0x80); @@ -138,7 +138,7 @@ namespace utf8 uint32_t cp = 0; internal::utf_error err_code = internal::validate_next(it, end, &cp); switch (err_code) { - case internal::OK : + case internal::UTF8_OK : break; case internal::NOT_ENOUGH_ROOM : throw not_enough_room(); @@ -149,7 +149,7 @@ namespace utf8 case internal::INVALID_CODE_POINT : throw invalid_code_point(cp); } - return cp; + return cp; } template @@ -162,7 +162,7 @@ namespace utf8 uint32_t prior(octet_iterator& it, octet_iterator start) { octet_iterator end = it; - while (internal::is_trail(*(--it))) + while (internal::is_trail(*(--it))) if (it < start) throw invalid_utf8(*it); // error - no lead byte in the sequence octet_iterator temp = it; @@ -174,7 +174,7 @@ namespace utf8 uint32_t previous(octet_iterator& it, octet_iterator pass_start) { octet_iterator end = it; - while (internal::is_trail(*(--it))) + while (internal::is_trail(*(--it))) if (it == pass_start) throw invalid_utf8(*it); // error - no lead byte in the sequence octet_iterator temp = it; @@ -193,14 +193,14 @@ namespace utf8 distance (octet_iterator first, octet_iterator last) { typename std::iterator_traits::difference_type dist; - for (dist = 0; first < last; ++dist) + for (dist = 0; first < last; ++dist) next(first, last); return dist; } template octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) - { + { while (start != end) { uint32_t cp = internal::mask16(*start++); // Take care of surrogate pairs first @@ -208,17 +208,17 @@ namespace utf8 if (start != end) { uint32_t trail_surrogate = internal::mask16(*start++); if (trail_surrogate >= internal::TRAIL_SURROGATE_MIN && trail_surrogate <= internal::TRAIL_SURROGATE_MAX) - cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; - else + cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; + else throw invalid_utf16(static_cast(trail_surrogate)); } - else + else throw invalid_utf16(static_cast(*start)); - + } result = append(cp, result); } - return result; + return result; } template @@ -256,13 +256,13 @@ namespace utf8 // The iterator class template - class iterator : public std::iterator { + class iterator : public std::iterator { octet_iterator it; octet_iterator range_start; octet_iterator range_end; public: iterator () {}; - explicit iterator (const octet_iterator& octet_it, + explicit iterator (const octet_iterator& octet_it, const octet_iterator& range_start, const octet_iterator& range_end) : it(octet_it), range_start(range_start), range_end(range_end) @@ -277,8 +277,8 @@ namespace utf8 octet_iterator temp = it; return next(temp, range_end); } - bool operator == (const iterator& rhs) const - { + bool operator == (const iterator& rhs) const + { if (range_start != rhs.range_start || range_end != rhs.range_end) throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); return (it == rhs.it); @@ -287,7 +287,7 @@ namespace utf8 { return !(operator == (rhs)); } - iterator& operator ++ () + iterator& operator ++ () { next(it, range_end); return *this; @@ -297,7 +297,7 @@ namespace utf8 iterator temp = *this; next(it, range_end); return temp; - } + } iterator& operator -- () { prior(it, range_start); diff --git a/v2_0/source/utf8/core.h b/v2_0/source/utf8/core.h index ca8c94a..8f27379 100644 --- a/v2_0/source/utf8/core.h +++ b/v2_0/source/utf8/core.h @@ -33,7 +33,7 @@ DEALINGS IN THE SOFTWARE. namespace utf8 { // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers - // You may need to change them to match your system. + // You may need to change them to match your system. // These typedefs have the same names as ones from cstdint, or boost/cstdint typedef unsigned char uint8_t; typedef unsigned short uint16_t; @@ -41,7 +41,7 @@ namespace utf8 // Helper code - not intended to be directly called by the library users. May be changed at any time namespace internal -{ +{ // Unicode constants // Leading (high) surrogates: 0xd800 - 0xdbff // Trailing (low) surrogates: 0xdc00 - 0xdfff @@ -81,14 +81,14 @@ namespace internal inline bool is_code_point_valid(u32 cp) { return (cp <= CODE_POINT_MAX && !is_surrogate(cp) && cp != 0xfffe && cp != 0xffff); - } + } template inline typename std::iterator_traits::difference_type sequence_length(octet_iterator lead_it) { uint8_t lead = mask8(*lead_it); - if (lead < 0x80) + if (lead < 0x80) return 1; else if ((lead >> 5) == 0x6) return 2; @@ -96,11 +96,11 @@ namespace internal return 3; else if ((lead >> 3) == 0x1e) return 4; - else + else return 0; } - enum utf_error {OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT}; + enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT}; template utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t* code_point) @@ -116,23 +116,23 @@ namespace internal if (code_point) *code_point = cp; ++it; - return OK; + return UTF8_OK; } else return NOT_ENOUGH_ROOM; } - // Do we have enough memory? + // Do we have enough memory? if (std::distance(it, end) < length) return NOT_ENOUGH_ROOM; - + // Check trail octets and calculate the code point switch (length) { case 0: return INVALID_LEAD; break; case 2: - if (is_trail(*(++it))) { + if (is_trail(*(++it))) { cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); } else { @@ -158,11 +158,11 @@ namespace internal break; case 4: if (is_trail(*(++it))) { - cp = ((cp << 18) & 0x1fffff) + ((mask8(*it) << 12) & 0x3ffff); + cp = ((cp << 18) & 0x1fffff) + ((mask8(*it) << 12) & 0x3ffff); if (is_trail(*(++it))) { cp += (mask8(*it) << 6) & 0xfff; if (is_trail(*(++it))) { - cp += (*it) & 0x3f; + cp += (*it) & 0x3f; } else { std::advance(it, -3); @@ -182,14 +182,14 @@ namespace internal } // Is the code point valid? if (!is_code_point_valid(cp)) { - for (octet_difference_type i = 0; i < length - 1; ++i) + for (octet_difference_type i = 0; i < length - 1; ++i) --it; return INVALID_CODE_POINT; } - + if (code_point) *code_point = cp; - + if (cp < 0x80) { if (length != 1) { std::advance(it, -(length-1)); @@ -208,9 +208,9 @@ namespace internal return OVERLONG_SEQUENCE; } } - + ++it; - return OK; + return UTF8_OK; } template @@ -218,12 +218,12 @@ namespace internal return validate_next(it, end, 0); } -} // namespace internal +} // namespace internal /// The library API - functions intended to be called by the users // Byte order mark - const uint8_t bom[] = {0xef, 0xbb, 0xbf}; + const uint8_t bom[] = {0xef, 0xbb, 0xbf}; template octet_iterator find_invalid(octet_iterator start, octet_iterator end) @@ -231,7 +231,7 @@ namespace internal octet_iterator result = start; while (result != end) { internal::utf_error err_code = internal::validate_next(result, end); - if (err_code != internal::OK) + if (err_code != internal::UTF8_OK) return result; } return result;