Updated the documentation and a test to include peek_next()

git-svn-id: http://svn.code.sf.net/p/utfcpp/code@83 a809a056-fc17-0410-9590-b4f493f8b08e
This commit is contained in:
ntrifunovic 2007-10-27 23:34:59 +00:00 committed by King_DuckZ
parent 3a04fda526
commit 4d7ad9b625
2 changed files with 82 additions and 2 deletions

View file

@ -294,6 +294,46 @@ assert (w == twochars + <span class="literal">3</span>);
<p>
This function is typically used to iterate through a UTF-8 encoded string.
</p>
<p>
In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
thrown.
</p>
<h4>
utf8::peek_next
</h4>
<p class="version">
Available in version 2.1 and later.
</p>
<p>
Given the iterator to the beginning of the UTF-8 sequence, it returns the code
point for the following sequence without changing the value of the iterator.
</p>
<pre>
<span class="keyword">template</span> &lt;<span class=
"keyword">typename</span> octet_iterator&gt;
uint32_t peek_next(octet_iterator it, octet_iterator end);
</pre>
<p>
<code>it</code>: an iterator pointing to the beginning of an UTF-8
encoded code point.<br>
<code>end</code>: end of the UTF-8 sequence to be processed. If <code>it</code>
gets equal to <code>end</code> during the extraction of a code point, an
<code>utf8::not_enough_room</code> exception is thrown.<br>
<span class="return_value">Return value</span>: the 32 bit representation of the
processed UTF-8 code point.
</p>
<p>
Example of use:
</p>
<pre>
<span class="keyword">char</span>* twochars = <span class=
"literal">"\xe6\x97\xa5\xd1\x88"</span>;
<span class="keyword">char</span>* w = twochars;
<span class="keyword">int</span> cp = peek_next(w, twochars + <span class="literal">6</span>);
assert (cp == <span class="literal">0x65e5</span>);
assert (w == twochars);
</pre>
<p>
In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
thrown.
@ -1011,6 +1051,42 @@ assert (w == twochars + <span class="literal">3</span>);
This is a faster but less safe version of <code>utf8::next</code>. It does not
check for validity of the supplied UTF-8 sequence.
</p>
<h4>
utf8::unchecked::peek_next
</h4>
<p class="version">
Available in version 2.1 and later.
</p>
<p>
Given the iterator to the beginning of a UTF-8 sequence, it returns the code point.
</p>
<pre>
<span class="keyword">template</span> &lt;<span class=
"keyword">typename</span> octet_iterator&gt;
uint32_t peek_next(octet_iterator it);
</pre>
<p>
<code>it</code>: an iterator pointing to the beginning of an UTF-8
encoded code point.<br>
<span class="return_value">Return value</span>: the 32 bit representation of the
processed UTF-8 code point.
</p>
<p>
Example of use:
</p>
<pre>
<span class="keyword">char</span>* twochars = <span class=
"literal">"\xe6\x97\xa5\xd1\x88"</span>;
<span class="keyword">char</span>* w = twochars;
<span class="keyword">int</span> cp = unchecked::peek_next(w);
assert (cp == <span class="literal">0x65e5</span>);
assert (w == twochars);
</pre>
<p>
This is a faster but less safe version of <code>utf8::peek_next</code>. It does not
check for validity of the supplied UTF-8 sequence.
</p>
<h4>
utf8::unchecked::prior
</h4>

View file

@ -59,7 +59,9 @@ int main(int argc, char** argv)
unsigned char_count = 0;
string::iterator it = line_start;
while (it != line_end) {
next(it, line_end);
unsigned int next_cp = peek_next(it, line_end);
if (next(it, line_end) != next_cp)
cout << "Line " << line_count << ": Error: peek_next gave a different result than next" << '\n';
char_count++;
}
if (char_count != utf32_line.size())
@ -121,7 +123,9 @@ int main(int argc, char** argv)
char_count = 0;
it = line_start;
while (it != line_end) {
unchecked::next(it);
unsigned int next_cp = unchecked::peek_next(it);
if (unchecked::next(it) != next_cp)
cout << "Line " << line_count << ": Error: unchecked::peek_next gave a different result than unchecked::next" << '\n';;
char_count++;
}
if (char_count != utf32_line.size())