From 4d7ad9b625424468e33ad1c9a74dacbbeab79d14 Mon Sep 17 00:00:00 2001
From: ntrifunovic <ntrifunovic@a809a056-fc17-0410-9590-b4f493f8b08e>
Date: Sat, 27 Oct 2007 23:34:59 +0000
Subject: [PATCH] Updated the documentation and a test to include peek_next()

git-svn-id: http://svn.code.sf.net/p/utfcpp/code@83 a809a056-fc17-0410-9590-b4f493f8b08e
---
 doc/utf8cpp.html                       | 76 ++++++++++++++++++++++++++
 test_drivers/utf8reader/utf8reader.cpp |  8 ++-
 2 files changed, 82 insertions(+), 2 deletions(-)
diff --git a/doc/utf8cpp.html b/doc/utf8cpp.html
index 63e9afd..4ad7e10 100644
--- a/doc/utf8cpp.html
+++ b/doc/utf8cpp.html
@@ -294,6 +294,46 @@ assert (w == twochars + <span class="literal">3</span>);
     <p>
       This function is typically used to iterate through a UTF-8 encoded string.
     </p>
+    <p>
+      In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
+      thrown.
+    </p>
+    <h4>
+      utf8::peek_next
+    </h4>
+    <p class="version">
+    Available in version 2.1 and later.
+    </p>
+    <p>
+      Given the iterator to the beginning of the UTF-8 sequence, it returns the code
+      point for the following sequence without changing the value of the iterator. 
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt; 
+uint32_t peek_next(octet_iterator it, octet_iterator end);
+   
+</pre>
+    <p>
+      <code>it</code>: an iterator pointing to the beginning of an UTF-8
+      encoded code point.<br>
+       <code>end</code>: end of the UTF-8 sequence to be processed. If <code>it</code>
+      gets equal to <code>end</code> during the extraction of a code point, an
+      <code>utf8::not_enough_room</code> exception is thrown.<br>
+       <span class="return_value">Return value</span>: the 32 bit representation of the
+      processed UTF-8 code point.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+<span class="keyword">char</span>* w = twochars;
+<span class="keyword">int</span> cp = peek_next(w, twochars + <span class="literal">6</span>);
+assert (cp == <span class="literal">0x65e5</span>);
+assert (w == twochars);
+</pre>
     <p>
       In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
       thrown.
@@ -1011,6 +1051,42 @@ assert (w == twochars + <span class="literal">3</span>);
       This is a faster but less safe version of <code>utf8::next</code>. It does not
       check for validity of the supplied UTF-8 sequence.
     </p>
+    <h4>
+      utf8::unchecked::peek_next
+    </h4>
+    <p class="version">
+    Available in version 2.1 and later.
+    </p>
+    <p>
+      Given the iterator to the beginning of a UTF-8 sequence, it returns the code point.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt;
+uint32_t peek_next(octet_iterator it);
+   
+</pre>
+    <p>
+      <code>it</code>: an iterator pointing to the beginning of an UTF-8
+      encoded code point.<br>
+       <span class="return_value">Return value</span>: the 32 bit representation of the
+      processed UTF-8 code point.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+<span class="keyword">char</span>* w = twochars;
+<span class="keyword">int</span> cp = unchecked::peek_next(w);
+assert (cp == <span class="literal">0x65e5</span>);
+assert (w == twochars);
+</pre>
+    <p>
+      This is a faster but less safe version of <code>utf8::peek_next</code>. It does not
+      check for validity of the supplied UTF-8 sequence.
+    </p>
     <h4>
       utf8::unchecked::prior
     </h4>
diff --git a/test_drivers/utf8reader/utf8reader.cpp b/test_drivers/utf8reader/utf8reader.cpp
index ca85286..c88a5ee 100644
--- a/test_drivers/utf8reader/utf8reader.cpp
+++ b/test_drivers/utf8reader/utf8reader.cpp
@@ -59,7 +59,9 @@ int main(int argc, char** argv)
         unsigned char_count = 0;
         string::iterator it = line_start;
         while (it != line_end) {
-            next(it, line_end);
+            unsigned int next_cp = peek_next(it, line_end);
+            if (next(it, line_end) != next_cp)
+                cout << "Line " << line_count << ": Error: peek_next gave a different result than next" << '\n';
             char_count++;
         }
         if (char_count != utf32_line.size())
@@ -121,7 +123,9 @@ int main(int argc, char** argv)
         char_count = 0;
         it = line_start;
         while (it != line_end) {
-            unchecked::next(it);
+            unsigned int next_cp = unchecked::peek_next(it); 
+            if (unchecked::next(it) != next_cp)
+              cout << "Line " << line_count << ": Error: unchecked::peek_next gave a different result than unchecked::next" << '\n';;
             char_count++;
         }
         if (char_count != utf32_line.size())