Removing bidirectional restrictions for the octet_iterator

git-svn-id: http://svn.code.sf.net/p/utfcpp/code@87 a809a056-fc17-0410-9590-b4f493f8b08e
This commit is contained in:
ntrifunovic 2009-07-01 11:55:37 +00:00
parent dacd49dde9
commit 5748eeff08
4 changed files with 27 additions and 27 deletions

View file

@ -104,7 +104,8 @@ namespace internal
template <typename octet_iterator>
utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t* code_point)
{
{
octet_iterator original_it = it;
uint32_t cp = mask8(*it);
// Check the lead octet
typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
@ -112,7 +113,7 @@ namespace internal
// "Shortcut" for ASCII characters
if (length == 1) {
if (end - it > 0) {
if (std::distance(it, end) > 0) {
if (code_point)
*code_point = cp;
++it;
@ -136,7 +137,7 @@ namespace internal
cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
}
else {
--it;
it = original_it;
return INCOMPLETE_SEQUENCE;
}
break;
@ -147,12 +148,12 @@ namespace internal
cp += (*it) & 0x3f;
}
else {
std::advance(it, -2);
it = original_it;
return INCOMPLETE_SEQUENCE;
}
}
else {
--it;
it = original_it;
return INCOMPLETE_SEQUENCE;
}
break;
@ -165,17 +166,17 @@ namespace internal
cp += (*it) & 0x3f;
}
else {
std::advance(it, -3);
it = original_it;
return INCOMPLETE_SEQUENCE;
}
}
else {
std::advance(it, -2);
it = original_it;
return INCOMPLETE_SEQUENCE;
}
}
else {
--it;
it = original_it;
return INCOMPLETE_SEQUENCE;
}
break;
@ -183,7 +184,7 @@ namespace internal
// Is the code point valid?
if (!is_code_point_valid(cp)) {
for (octet_difference_type i = 0; i < length - 1; ++i)
--it;
it = original_it;
return INVALID_CODE_POINT;
}
@ -192,19 +193,19 @@ namespace internal
if (cp < 0x80) {
if (length != 1) {
std::advance(it, -(length-1));
it = original_it;
return OVERLONG_SEQUENCE;
}
}
else if (cp < 0x800) {
if (length != 2) {
std::advance(it, -(length-1));
it = original_it;
return OVERLONG_SEQUENCE;
}
}
else if (cp < 0x10000) {
if (length != 3) {
std::advance(it, -(length-1));
it = original_it;
return OVERLONG_SEQUENCE;
}
}

View file

@ -7,8 +7,8 @@ using namespace utf8;
#include <algorithm>
using namespace std;
const char* TEST_FILE_PATH = "../../test_data/negative/utf8_invalid.txt";
const unsigned INVALID_LINES[] = { 75, 76, 82, 83, 84, 85, 93, 102, 103, 105, 106, 107, 108, 109, 110, 114, 115, 116, 117, 124, 125, 130, 135, 140, 145, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 169, 175, 176, 177, 207, 208, 209, 210, 211, 220, 221, 222, 223, 224, 232, 233, 234, 235, 236, 247, 248, 249, 250, 251, 252, 253, 257, 258, 259, 260, 261, 262, 263, 264, 268, 269};
const char* TEST_FILE_PATH = "../../../test_data/negative/utf8_invalid.txt";
const unsigned INVALID_LINES[] = { 75, 76, 82, 83, 84, 85, 93, 102, 103, 105, 106, 107, 108, 109, 110, 114, 115, 116, 117, 124, 125, 130, 135, 140, 145, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 169, 175, 176, 177, 207, 208, 209, 210, 211, 220, 221, 222, 223, 224, 232, 233, 234, 235, 236, 247, 248, 249, 250, 251, 252, 253, 257, 258, 259, 260, 261, 262, 263, 264, 268, 269};
const unsigned* INVALID_LINES_END = INVALID_LINES + sizeof(INVALID_LINES)/sizeof(unsigned);
int main()
@ -25,14 +25,14 @@ int main()
char byte;
while (!fs8.eof()) {
string line;
while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof())
while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof())
line.push_back(byte);
line_count++;
// Print out lines that contain invalid UTF-8
if (!is_valid(line.begin(), line.end())) {
const unsigned* u = find(INVALID_LINES, INVALID_LINES_END, line_count);
if (u == INVALID_LINES_END)
if (u == INVALID_LINES_END)
cout << "Unexpected invalid utf-8 at line " << line_count << '\n';
// try fixing it:

View file

@ -8,8 +8,8 @@ struct timer {
using namespace std;
end = clock();
unsigned milliseconds = (end - start)*1000 / CLOCKS_PER_SEC;
report << "Spent " << milliseconds << "ms here\n";
}
report << "Spent " << milliseconds << "ms here\n";
}
std::clock_t start;
std::clock_t end;
@ -17,5 +17,5 @@ struct timer {
private:
// just to surpress a VC++ 8.0 warning
timer& operator = (const timer&) {};
timer& operator = (const timer&);
};

View file

@ -53,30 +53,29 @@ int main(int argc, char** argv)
timer t(cout);
utf8::unchecked::utf8to16(buf, buf + length, utf16buf);
}
// the UTF-16 result will not be larger than this (I hope :) )
wchar_t* utf16iconvbuf = new wchar_t[wlength];
{
memset (utf16iconvbuf, 0 , wlength * sizeof(wchar_t));
// win32
cout << "win32: ";
{
timer t(cout);
MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, buf, length, utf16iconvbuf, int(wlength));
}
}
// just check the correctness while we are here:
if (!equal(utf16buf, utf16buf + wlength, utf16iconvbuf))
if (!equal(utf16buf, utf16buf + wlength, utf16iconvbuf))
cout << "Different result!!!";
// the other way around
cout << "UTF16 to UTF-8\n";
{
//win32
memset(buf, 0, length);
memset(buf, 0, length);
cout<< "win32: ";
{
@ -92,14 +91,14 @@ int main(int argc, char** argv)
timer t(cout);
utf8::unchecked::utf16to8(utf16buf, utf16buf + wlength, buf);
}
{
memset (buf, 0 , length);
cout << "utf16to8: ";
timer t(cout);
utf8::utf16to8(utf16buf, utf16buf + wlength, buf);
}
delete [] buf;
delete [] utf16buf;
}