From 0b6b8b5cf8c9c55db96ae1c5e0ef625d5ad90e02 Mon Sep 17 00:00:00 2001 From: ntrifunovic Date: Sat, 26 Sep 2009 01:41:24 +0000 Subject: [PATCH] Fix for bug ID: 2852872 [invalid utf16 strings were parsed without any error] git-svn-id: http://svn.code.sf.net/p/utfcpp/code@100 a809a056-fc17-0410-9590-b4f493f8b08e --- source/utf8/checked.h | 8 ++++++-- source/utf8/core.h | 12 ++++++++++++ source/utf8/unchecked.h | 2 +- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/source/utf8/checked.h b/source/utf8/checked.h index 16bab4d..ecac556 100644 --- a/source/utf8/checked.h +++ b/source/utf8/checked.h @@ -204,10 +204,10 @@ namespace utf8 while (start != end) { uint32_t cp = internal::mask16(*start++); // Take care of surrogate pairs first - if (internal::is_surrogate(cp)) { + if (internal::is_lead_surrogate(cp)) { if (start != end) { uint32_t trail_surrogate = internal::mask16(*start++); - if (trail_surrogate >= internal::TRAIL_SURROGATE_MIN && trail_surrogate <= internal::TRAIL_SURROGATE_MAX) + if (internal::is_trail_surrogate(trail_surrogate)) cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; else throw invalid_utf16(static_cast(trail_surrogate)); @@ -216,6 +216,10 @@ namespace utf8 throw invalid_utf16(static_cast(*start)); } + // Lone trail surrogate + else if (internal::is_trail_surrogate(cp)) + throw invalid_utf16(static_cast(cp)); + result = append(cp, result); } return result; diff --git a/source/utf8/core.h b/source/utf8/core.h index 557ab0f..5a55f06 100644 --- a/source/utf8/core.h +++ b/source/utf8/core.h @@ -71,6 +71,18 @@ namespace internal return ((mask8(oc) >> 6) == 0x2); } + template + inline bool is_lead_surrogate(u16 cp) + { + return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX); + } + + template + inline bool is_trail_surrogate(u16 cp) + { + return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); + } + template inline bool is_surrogate(u16 cp) { diff --git a/source/utf8/unchecked.h b/source/utf8/unchecked.h index 84207a5..d3110cb 100644 --- a/source/utf8/unchecked.h +++ b/source/utf8/unchecked.h @@ -132,7 +132,7 @@ namespace utf8 while (start != end) { uint32_t cp = internal::mask16(*start++); // Take care of surrogate pairs first - if (internal::is_surrogate(cp)) { + if (internal::is_lead_surrogate(cp)) { uint32_t trail_surrogate = internal::mask16(*start++); cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; }