diff --git a/source/utf8/checked.h b/source/utf8/checked.h index 16bab4d..ecac556 100644 --- a/source/utf8/checked.h +++ b/source/utf8/checked.h @@ -204,10 +204,10 @@ namespace utf8 while (start != end) { uint32_t cp = internal::mask16(*start++); // Take care of surrogate pairs first - if (internal::is_surrogate(cp)) { + if (internal::is_lead_surrogate(cp)) { if (start != end) { uint32_t trail_surrogate = internal::mask16(*start++); - if (trail_surrogate >= internal::TRAIL_SURROGATE_MIN && trail_surrogate <= internal::TRAIL_SURROGATE_MAX) + if (internal::is_trail_surrogate(trail_surrogate)) cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; else throw invalid_utf16(static_cast(trail_surrogate)); @@ -216,6 +216,10 @@ namespace utf8 throw invalid_utf16(static_cast(*start)); } + // Lone trail surrogate + else if (internal::is_trail_surrogate(cp)) + throw invalid_utf16(static_cast(cp)); + result = append(cp, result); } return result; diff --git a/source/utf8/core.h b/source/utf8/core.h index 557ab0f..5a55f06 100644 --- a/source/utf8/core.h +++ b/source/utf8/core.h @@ -71,6 +71,18 @@ namespace internal return ((mask8(oc) >> 6) == 0x2); } + template + inline bool is_lead_surrogate(u16 cp) + { + return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX); + } + + template + inline bool is_trail_surrogate(u16 cp) + { + return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); + } + template inline bool is_surrogate(u16 cp) { diff --git a/source/utf8/unchecked.h b/source/utf8/unchecked.h index 84207a5..d3110cb 100644 --- a/source/utf8/unchecked.h +++ b/source/utf8/unchecked.h @@ -132,7 +132,7 @@ namespace utf8 while (start != end) { uint32_t cp = internal::mask16(*start++); // Take care of surrogate pairs first - if (internal::is_surrogate(cp)) { + if (internal::is_lead_surrogate(cp)) { uint32_t trail_surrogate = internal::mask16(*start++); cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; }