Fix for the bug [ 1528369 ] utf8::find_invalid does not return the start of a seqence
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@21 a809a056-fc17-0410-9590-b4f493f8b08e
This commit is contained in:
parent
db36241cc1
commit
abc6fd5428
1 changed files with 34 additions and 11 deletions
|
@ -138,8 +138,10 @@ namespace internal
|
||||||
if (is_trail(*(++it))) {
|
if (is_trail(*(++it))) {
|
||||||
cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
|
cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
|
--it;
|
||||||
return INCOMPLETE_SEQUENCE;
|
return INCOMPLETE_SEQUENCE;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
if (is_trail(*(++it))) {
|
if (is_trail(*(++it))) {
|
||||||
|
@ -147,11 +149,15 @@ namespace internal
|
||||||
if (is_trail(*(++it))) {
|
if (is_trail(*(++it))) {
|
||||||
cp += (*it) & 0x3f;
|
cp += (*it) & 0x3f;
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
|
--it; --it;
|
||||||
return INCOMPLETE_SEQUENCE;
|
return INCOMPLETE_SEQUENCE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
|
--it;
|
||||||
return INCOMPLETE_SEQUENCE;
|
return INCOMPLETE_SEQUENCE;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
if (is_trail(*(++it))) {
|
if (is_trail(*(++it))) {
|
||||||
|
@ -161,35 +167,52 @@ namespace internal
|
||||||
if (is_trail(*(++it))) {
|
if (is_trail(*(++it))) {
|
||||||
cp += (*it) & 0x3f;
|
cp += (*it) & 0x3f;
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
|
--it; --it; --it;
|
||||||
return INCOMPLETE_SEQUENCE;
|
return INCOMPLETE_SEQUENCE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
|
--it; --it;
|
||||||
return INCOMPLETE_SEQUENCE;
|
return INCOMPLETE_SEQUENCE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
|
--it;
|
||||||
return INCOMPLETE_SEQUENCE;
|
return INCOMPLETE_SEQUENCE;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// Is the code point valid?
|
// Is the code point valid?
|
||||||
if (cp > CODE_POINT_MAX || is_surrogate(cp) || cp == 0xfffe || cp == 0xffff)
|
if (cp > CODE_POINT_MAX || is_surrogate(cp) || cp == 0xfffe || cp == 0xffff) {
|
||||||
|
for (size_t i = 0; i < sequence_length - 1; ++i)
|
||||||
|
--it;
|
||||||
return INVALID_CODE_POINT;
|
return INVALID_CODE_POINT;
|
||||||
|
}
|
||||||
|
|
||||||
if (code_point)
|
if (code_point)
|
||||||
*code_point = cp;
|
*code_point = cp;
|
||||||
|
|
||||||
// Overlong sequence?
|
|
||||||
if (cp < 0x80) {
|
if (cp < 0x80) {
|
||||||
if (sequence_length != 1)
|
if (sequence_length != 1) {
|
||||||
|
for (size_t i = 0; i < sequence_length - 1; ++i)
|
||||||
|
--it;
|
||||||
return OVERLONG_SEQUENCE;
|
return OVERLONG_SEQUENCE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (cp < 0x800) {
|
else if (cp < 0x800) {
|
||||||
if (sequence_length != 2)
|
if (sequence_length != 2) {
|
||||||
|
for (size_t i = 0; i < sequence_length - 1; ++i)
|
||||||
|
--it;
|
||||||
return OVERLONG_SEQUENCE;
|
return OVERLONG_SEQUENCE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (cp < 0x10000) {
|
else if (cp < 0x10000) {
|
||||||
if (sequence_length != 3)
|
if (sequence_length != 3) {
|
||||||
|
for (size_t i = 0; i < sequence_length - 1; ++i)
|
||||||
|
--it;
|
||||||
return OVERLONG_SEQUENCE;
|
return OVERLONG_SEQUENCE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
++it;
|
++it;
|
||||||
|
|
Loading…
Add table
Reference in a new issue