Removing bidirectional restrictions for the octet_iterator

git-svn-id: http://svn.code.sf.net/p/utfcpp/code@87 a809a056-fc17-0410-9590-b4f493f8b08e
2009-07-01 11:55:37 +00:00 · 2009-07-01 11:55:37 +00:00 · 5748eeff08
commit 5748eeff08
parent dacd49dde9
4 changed files with 27 additions and 27 deletions
--- a/v2_0/source/utf8/core.h
+++ b/v2_0/source/utf8/core.h
@ -104,7 +104,8 @@ namespace internal

    template <typename octet_iterator>
    utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t* code_point)
-    {
+    {
+        octet_iterator original_it = it;
        uint32_t cp = mask8(*it);
        // Check the lead octet
        typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
@ -112,7 +113,7 @@ namespace internal

        // "Shortcut" for ASCII characters
        if (length == 1) {
-            if (end - it > 0) {
+            if (std::distance(it, end) > 0) {
                if (code_point)
                    *code_point = cp;
                ++it;
@ -136,7 +137,7 @@ namespace internal
                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
                }
                else {
-                    --it;
+                    it = original_it;
                    return INCOMPLETE_SEQUENCE;
                }
            break;
@ -147,12 +148,12 @@ namespace internal
                        cp += (*it) & 0x3f;
                    }
                    else {
-                        std::advance(it, -2);
+                        it = original_it;
                        return INCOMPLETE_SEQUENCE;
                    }
                }
                else {
-                    --it;
+                    it = original_it;
                    return INCOMPLETE_SEQUENCE;
                }
            break;
@ -165,17 +166,17 @@ namespace internal
                            cp += (*it) & 0x3f;
                        }
                        else {
-                            std::advance(it, -3);
+                            it = original_it;
                            return INCOMPLETE_SEQUENCE;
                        }
                    }
                    else {
-                        std::advance(it, -2);
+                        it = original_it;
                        return INCOMPLETE_SEQUENCE;
                    }
                }
                else {
-                    --it;
+                    it = original_it;
                    return INCOMPLETE_SEQUENCE;
                }
            break;
@ -183,7 +184,7 @@ namespace internal
        // Is the code point valid?
        if (!is_code_point_valid(cp)) {
            for (octet_difference_type i = 0; i < length - 1; ++i)
-                --it;
+                it = original_it;
            return INVALID_CODE_POINT;
        }

@ -192,19 +193,19 @@ namespace internal

        if (cp < 0x80) {
            if (length != 1) {
-                std::advance(it, -(length-1));
+                it = original_it;
                return OVERLONG_SEQUENCE;
            }
        }
        else if (cp < 0x800) {
            if (length != 2) {
-                std::advance(it, -(length-1));
+                it = original_it;
                return OVERLONG_SEQUENCE;
            }
        }
        else if (cp < 0x10000) {
            if (length != 3) {
-                std::advance(it, -(length-1));
+                it = original_it;
                return OVERLONG_SEQUENCE;
            }
        }
--- a/v2_0/test_drivers/negative/negative.cpp
+++ b/v2_0/test_drivers/negative/negative.cpp
@ -7,8 +7,8 @@ using namespace utf8;
 #include <algorithm>
 using namespace std;

-const char* TEST_FILE_PATH = "../../test_data/negative/utf8_invalid.txt";
-const unsigned INVALID_LINES[] = { 75, 76, 82, 83, 84, 85, 93, 102, 103, 105, 106, 107, 108, 109, 110, 114, 115, 116, 117, 124, 125, 130, 135, 140, 145, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 169, 175, 176, 177, 207, 208, 209, 210, 211, 220, 221, 222, 223, 224, 232, 233, 234, 235, 236, 247, 248, 249, 250, 251, 252, 253, 257, 258, 259, 260, 261, 262, 263, 264, 268, 269}; 
+const char* TEST_FILE_PATH = "../../../test_data/negative/utf8_invalid.txt";
+const unsigned INVALID_LINES[] = { 75, 76, 82, 83, 84, 85, 93, 102, 103, 105, 106, 107, 108, 109, 110, 114, 115, 116, 117, 124, 125, 130, 135, 140, 145, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 169, 175, 176, 177, 207, 208, 209, 210, 211, 220, 221, 222, 223, 224, 232, 233, 234, 235, 236, 247, 248, 249, 250, 251, 252, 253, 257, 258, 259, 260, 261, 262, 263, 264, 268, 269};
 const unsigned* INVALID_LINES_END = INVALID_LINES + sizeof(INVALID_LINES)/sizeof(unsigned);

 int main()
@ -25,14 +25,14 @@ int main()
    char byte;
    while (!fs8.eof()) {
        string line;
-        while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof()) 
+        while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof())
 	    line.push_back(byte);

        line_count++;
 	// Print out lines that contain invalid UTF-8
 	if (!is_valid(line.begin(), line.end())) {
 	   const unsigned* u = find(INVALID_LINES, INVALID_LINES_END, line_count);
-	   if (u == INVALID_LINES_END) 
+	   if (u == INVALID_LINES_END)
 	       cout << "Unexpected invalid utf-8 at line " << line_count << '\n';

           // try fixing it:
--- a/v2_0/test_drivers/performance/timer.h
+++ b/v2_0/test_drivers/performance/timer.h
@ -8,8 +8,8 @@ struct timer {
          using namespace std;
          end = clock();
          unsigned milliseconds = (end - start)*1000 / CLOCKS_PER_SEC;
-          report << "Spent " << milliseconds << "ms here\n";  
-       } 
+          report << "Spent " << milliseconds << "ms here\n";
+       }

    std::clock_t start;
    std::clock_t end;
@ -17,5 +17,5 @@ struct timer {

 private:
    // just to surpress a VC++ 8.0 warning
-    timer& operator = (const timer&) {};
+    timer& operator = (const timer&);
 };
--- a/v2_0/test_drivers/performance/win32.cpp
+++ b/v2_0/test_drivers/performance/win32.cpp
@ -53,30 +53,29 @@ int main(int argc, char** argv)
        timer t(cout);
        utf8::unchecked::utf8to16(buf, buf + length, utf16buf);
    }
-
    // the UTF-16 result will not be larger than this (I hope :) )
    wchar_t* utf16iconvbuf = new wchar_t[wlength];
    {
        memset (utf16iconvbuf, 0 , wlength * sizeof(wchar_t));
        // win32
        cout << "win32: ";
-        
+
        {
            timer t(cout);
            MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, buf, length, utf16iconvbuf, int(wlength));
        }
-        
+
    }

    // just check the correctness while we are here:
-    if (!equal(utf16buf, utf16buf + wlength, utf16iconvbuf)) 
+    if (!equal(utf16buf, utf16buf + wlength, utf16iconvbuf))
        cout << "Different result!!!";
-    
+
    // the other way around
    cout << "UTF16 to UTF-8\n";
    {
        //win32
-        memset(buf, 0, length);    
+        memset(buf, 0, length);
        cout<< "win32: ";

        {
@ -92,14 +91,14 @@ int main(int argc, char** argv)
        timer t(cout);
        utf8::unchecked::utf16to8(utf16buf, utf16buf + wlength, buf);
    }
-    
+
    {
        memset (buf, 0 , length);
        cout << "utf16to8: ";
        timer t(cout);
        utf8::utf16to8(utf16buf, utf16buf + wlength, buf);
    }
-    
+
    delete [] buf;
    delete [] utf16buf;
 }