From 1976a0584c7eacecf857f64053211400e59201c2 Mon Sep 17 00:00:00 2001 From: ntrifunovic Date: Wed, 1 Jul 2009 11:55:37 +0000 Subject: [PATCH] Removing bidirectional restrictions for the octet_iterator git-svn-id: http://svn.code.sf.net/p/utfcpp/code@87 a809a056-fc17-0410-9590-b4f493f8b08e Conflicts: v2_0/test_drivers/performance/timer.h v2_0/test_drivers/performance/win32.cpp --- source/utf8/core.h | 25 +++++++++++++------------ test_drivers/negative/negative.cpp | 8 ++++---- test_drivers/performance/timer.h | 6 +++--- test_drivers/performance/win32.cpp | 15 +++++++-------- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/source/utf8/core.h b/source/utf8/core.h index 8f27379..196f0b3 100644 --- a/source/utf8/core.h +++ b/source/utf8/core.h @@ -104,7 +104,8 @@ namespace internal template utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t* code_point) - { + { + octet_iterator original_it = it; uint32_t cp = mask8(*it); // Check the lead octet typedef typename std::iterator_traits::difference_type octet_difference_type; @@ -112,7 +113,7 @@ namespace internal // "Shortcut" for ASCII characters if (length == 1) { - if (end - it > 0) { + if (std::distance(it, end) > 0) { if (code_point) *code_point = cp; ++it; @@ -136,7 +137,7 @@ namespace internal cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); } else { - --it; + it = original_it; return INCOMPLETE_SEQUENCE; } break; @@ -147,12 +148,12 @@ namespace internal cp += (*it) & 0x3f; } else { - std::advance(it, -2); + it = original_it; return INCOMPLETE_SEQUENCE; } } else { - --it; + it = original_it; return INCOMPLETE_SEQUENCE; } break; @@ -165,17 +166,17 @@ namespace internal cp += (*it) & 0x3f; } else { - std::advance(it, -3); + it = original_it; return INCOMPLETE_SEQUENCE; } } else { - std::advance(it, -2); + it = original_it; return INCOMPLETE_SEQUENCE; } } else { - --it; + it = original_it; return INCOMPLETE_SEQUENCE; } break; @@ -183,7 +184,7 @@ namespace internal // Is the code point valid? if (!is_code_point_valid(cp)) { for (octet_difference_type i = 0; i < length - 1; ++i) - --it; + it = original_it; return INVALID_CODE_POINT; } @@ -192,19 +193,19 @@ namespace internal if (cp < 0x80) { if (length != 1) { - std::advance(it, -(length-1)); + it = original_it; return OVERLONG_SEQUENCE; } } else if (cp < 0x800) { if (length != 2) { - std::advance(it, -(length-1)); + it = original_it; return OVERLONG_SEQUENCE; } } else if (cp < 0x10000) { if (length != 3) { - std::advance(it, -(length-1)); + it = original_it; return OVERLONG_SEQUENCE; } } diff --git a/test_drivers/negative/negative.cpp b/test_drivers/negative/negative.cpp index 8c910d1..554848a 100644 --- a/test_drivers/negative/negative.cpp +++ b/test_drivers/negative/negative.cpp @@ -7,8 +7,8 @@ using namespace utf8; #include using namespace std; -const char* TEST_FILE_PATH = "../../test_data/negative/utf8_invalid.txt"; -const unsigned INVALID_LINES[] = { 75, 76, 82, 83, 84, 85, 93, 102, 103, 105, 106, 107, 108, 109, 110, 114, 115, 116, 117, 124, 125, 130, 135, 140, 145, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 169, 175, 176, 177, 207, 208, 209, 210, 211, 220, 221, 222, 223, 224, 232, 233, 234, 235, 236, 247, 248, 249, 250, 251, 252, 253, 257, 258, 259, 260, 261, 262, 263, 264, 268, 269}; +const char* TEST_FILE_PATH = "../../../test_data/negative/utf8_invalid.txt"; +const unsigned INVALID_LINES[] = { 75, 76, 82, 83, 84, 85, 93, 102, 103, 105, 106, 107, 108, 109, 110, 114, 115, 116, 117, 124, 125, 130, 135, 140, 145, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 169, 175, 176, 177, 207, 208, 209, 210, 211, 220, 221, 222, 223, 224, 232, 233, 234, 235, 236, 247, 248, 249, 250, 251, 252, 253, 257, 258, 259, 260, 261, 262, 263, 264, 268, 269}; const unsigned* INVALID_LINES_END = INVALID_LINES + sizeof(INVALID_LINES)/sizeof(unsigned); int main() @@ -25,14 +25,14 @@ int main() char byte; while (!fs8.eof()) { string line; - while ((byte = static_cast(fs8.get())) != '\n' && !fs8.eof()) + while ((byte = static_cast(fs8.get())) != '\n' && !fs8.eof()) line.push_back(byte); line_count++; // Print out lines that contain invalid UTF-8 if (!is_valid(line.begin(), line.end())) { const unsigned* u = find(INVALID_LINES, INVALID_LINES_END, line_count); - if (u == INVALID_LINES_END) + if (u == INVALID_LINES_END) cout << "Unexpected invalid utf-8 at line " << line_count << '\n'; // try fixing it: diff --git a/test_drivers/performance/timer.h b/test_drivers/performance/timer.h index 170dea3..88f2141 100644 --- a/test_drivers/performance/timer.h +++ b/test_drivers/performance/timer.h @@ -8,8 +8,8 @@ struct timer { using namespace std; end = clock(); unsigned milliseconds = (end - start)*1000 / CLOCKS_PER_SEC; - report << "Spent " << milliseconds << "ms here\n"; - } + report << "Spent " << milliseconds << "ms here\n"; + } std::clock_t start; std::clock_t end; @@ -17,5 +17,5 @@ struct timer { private: // just to surpress a VC++ 8.0 warning - timer& operator = (const timer&) {}; + timer& operator = (const timer&); }; diff --git a/test_drivers/performance/win32.cpp b/test_drivers/performance/win32.cpp index 9577920..c285ae7 100644 --- a/test_drivers/performance/win32.cpp +++ b/test_drivers/performance/win32.cpp @@ -53,30 +53,29 @@ int main(int argc, char** argv) timer t(cout); utf8::unchecked::utf8to16(buf, buf + length, utf16buf); } - // the UTF-16 result will not be larger than this (I hope :) ) wchar_t* utf16iconvbuf = new wchar_t[wlength]; { memset (utf16iconvbuf, 0 , wlength * sizeof(wchar_t)); // win32 cout << "win32: "; - + { timer t(cout); MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, buf, length, utf16iconvbuf, int(wlength)); } - + } // just check the correctness while we are here: - if (!equal(utf16buf, utf16buf + wlength, utf16iconvbuf)) + if (!equal(utf16buf, utf16buf + wlength, utf16iconvbuf)) cout << "Different result!!!"; - + // the other way around cout << "UTF16 to UTF-8\n"; { //win32 - memset(buf, 0, length); + memset(buf, 0, length); cout<< "win32: "; { @@ -92,14 +91,14 @@ int main(int argc, char** argv) timer t(cout); utf8::unchecked::utf16to8(utf16buf, utf16buf + wlength, buf); } - + { memset (buf, 0 , length); cout << "utf16to8: "; timer t(cout); utf8::utf16to8(utf16buf, utf16buf + wlength, buf); } - + delete [] buf; delete [] utf16buf; }