Updated the documentation and a test to include peek_next()

git-svn-id: http://svn.code.sf.net/p/utfcpp/code@83 a809a056-fc17-0410-9590-b4f493f8b08e
2007-10-27 23:34:59 +00:00 · 2007-10-27 23:34:59 +00:00 · 4d7ad9b625
commit 4d7ad9b625
parent 3a04fda526
2 changed files with 82 additions and 2 deletions
--- a/doc/utf8cpp.html
+++ b/doc/utf8cpp.html
@ -294,6 +294,46 @@ assert (w == twochars + <span class="literal">3</span>);
    <p>
      This function is typically used to iterate through a UTF-8 encoded string.
    </p>
+    <p>
+      In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
+      thrown.
+    </p>
+    <h4>
+      utf8::peek_next
+    </h4>
+    <p class="version">
+    Available in version 2.1 and later.
+    </p>
+    <p>
+      Given the iterator to the beginning of the UTF-8 sequence, it returns the code
+      point for the following sequence without changing the value of the iterator. 
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt; 
+uint32_t peek_next(octet_iterator it, octet_iterator end);
+   
+</pre>
+    <p>
+      <code>it</code>: an iterator pointing to the beginning of an UTF-8
+      encoded code point.<br>
+       <code>end</code>: end of the UTF-8 sequence to be processed. If <code>it</code>
+      gets equal to <code>end</code> during the extraction of a code point, an
+      <code>utf8::not_enough_room</code> exception is thrown.<br>
+       <span class="return_value">Return value</span>: the 32 bit representation of the
+      processed UTF-8 code point.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+<span class="keyword">char</span>* w = twochars;
+<span class="keyword">int</span> cp = peek_next(w, twochars + <span class="literal">6</span>);
+assert (cp == <span class="literal">0x65e5</span>);
+assert (w == twochars);
+</pre>
    <p>
      In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
      thrown.
@ -1011,6 +1051,42 @@ assert (w == twochars + <span class="literal">3</span>);
      This is a faster but less safe version of <code>utf8::next</code>. It does not
      check for validity of the supplied UTF-8 sequence.
    </p>
+    <h4>
+      utf8::unchecked::peek_next
+    </h4>
+    <p class="version">
+    Available in version 2.1 and later.
+    </p>
+    <p>
+      Given the iterator to the beginning of a UTF-8 sequence, it returns the code point.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt;
+uint32_t peek_next(octet_iterator it);
+   
+</pre>
+    <p>
+      <code>it</code>: an iterator pointing to the beginning of an UTF-8
+      encoded code point.<br>
+       <span class="return_value">Return value</span>: the 32 bit representation of the
+      processed UTF-8 code point.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+<span class="keyword">char</span>* w = twochars;
+<span class="keyword">int</span> cp = unchecked::peek_next(w);
+assert (cp == <span class="literal">0x65e5</span>);
+assert (w == twochars);
+</pre>
+    <p>
+      This is a faster but less safe version of <code>utf8::peek_next</code>. It does not
+      check for validity of the supplied UTF-8 sequence.
+    </p>
    <h4>
      utf8::unchecked::prior
    </h4>
--- a/test_drivers/utf8reader/utf8reader.cpp
+++ b/test_drivers/utf8reader/utf8reader.cpp
@ -59,7 +59,9 @@ int main(int argc, char** argv)
        unsigned char_count = 0;
        string::iterator it = line_start;
        while (it != line_end) {
-            next(it, line_end);
+            unsigned int next_cp = peek_next(it, line_end);
+            if (next(it, line_end) != next_cp)
+                cout << "Line " << line_count << ": Error: peek_next gave a different result than next" << '\n';
            char_count++;
        }
        if (char_count != utf32_line.size())
@ -121,7 +123,9 @@ int main(int argc, char** argv)
        char_count = 0;
        it = line_start;
        while (it != line_end) {
-            unchecked::next(it);
+            unsigned int next_cp = unchecked::peek_next(it); 
+            if (unchecked::next(it) != next_cp)
+              cout << "Line " << line_count << ": Error: unchecked::peek_next gave a different result than unchecked::next" << '\n';;
            char_count++;
        }
        if (char_count != utf32_line.size())