Fix for bug ID: 2852872 [invalid utf16 strings were parsed without any error]

git-svn-id: http://svn.code.sf.net/p/utfcpp/code@100 a809a056-fc17-0410-9590-b4f493f8b08e
This commit is contained in:
ntrifunovic 2009-09-26 01:41:24 +00:00
parent 6c3aa1f33e
commit f37a772149
3 changed files with 19 additions and 3 deletions

View file

@ -204,10 +204,10 @@ namespace utf8
while (start != end) {
uint32_t cp = internal::mask16(*start++);
// Take care of surrogate pairs first
if (internal::is_surrogate(cp)) {
if (internal::is_lead_surrogate(cp)) {
if (start != end) {
uint32_t trail_surrogate = internal::mask16(*start++);
if (trail_surrogate >= internal::TRAIL_SURROGATE_MIN && trail_surrogate <= internal::TRAIL_SURROGATE_MAX)
if (internal::is_trail_surrogate(trail_surrogate))
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
else
throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
@ -216,6 +216,10 @@ namespace utf8
throw invalid_utf16(static_cast<uint16_t>(*start));
}
// Lone trail surrogate
else if (internal::is_trail_surrogate(cp))
throw invalid_utf16(static_cast<uint16_t>(cp));
result = append(cp, result);
}
return result;

View file

@ -71,6 +71,18 @@ namespace internal
return ((mask8(oc) >> 6) == 0x2);
}
template <typename u16>
inline bool is_lead_surrogate(u16 cp)
{
return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
}
template <typename u16>
inline bool is_trail_surrogate(u16 cp)
{
return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
}
template <typename u16>
inline bool is_surrogate(u16 cp)
{

View file

@ -132,7 +132,7 @@ namespace utf8
while (start != end) {
uint32_t cp = internal::mask16(*start++);
// Take care of surrogate pairs first
if (internal::is_surrogate(cp)) {
if (internal::is_lead_surrogate(cp)) {
uint32_t trail_surrogate = internal::mask16(*start++);
cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
}