Updated the documentation and a test to include peek_next()
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@83 a809a056-fc17-0410-9590-b4f493f8b08e
This commit is contained in:
parent
3a04fda526
commit
4d7ad9b625
2 changed files with 82 additions and 2 deletions
|
@ -294,6 +294,46 @@ assert (w == twochars + <span class="literal">3</span>);
|
|||
<p>
|
||||
This function is typically used to iterate through a UTF-8 encoded string.
|
||||
</p>
|
||||
<p>
|
||||
In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
|
||||
thrown.
|
||||
</p>
|
||||
<h4>
|
||||
utf8::peek_next
|
||||
</h4>
|
||||
<p class="version">
|
||||
Available in version 2.1 and later.
|
||||
</p>
|
||||
<p>
|
||||
Given the iterator to the beginning of the UTF-8 sequence, it returns the code
|
||||
point for the following sequence without changing the value of the iterator.
|
||||
</p>
|
||||
<pre>
|
||||
<span class="keyword">template</span> <<span class=
|
||||
"keyword">typename</span> octet_iterator>
|
||||
uint32_t peek_next(octet_iterator it, octet_iterator end);
|
||||
|
||||
</pre>
|
||||
<p>
|
||||
<code>it</code>: an iterator pointing to the beginning of an UTF-8
|
||||
encoded code point.<br>
|
||||
<code>end</code>: end of the UTF-8 sequence to be processed. If <code>it</code>
|
||||
gets equal to <code>end</code> during the extraction of a code point, an
|
||||
<code>utf8::not_enough_room</code> exception is thrown.<br>
|
||||
<span class="return_value">Return value</span>: the 32 bit representation of the
|
||||
processed UTF-8 code point.
|
||||
</p>
|
||||
<p>
|
||||
Example of use:
|
||||
</p>
|
||||
<pre>
|
||||
<span class="keyword">char</span>* twochars = <span class=
|
||||
"literal">"\xe6\x97\xa5\xd1\x88"</span>;
|
||||
<span class="keyword">char</span>* w = twochars;
|
||||
<span class="keyword">int</span> cp = peek_next(w, twochars + <span class="literal">6</span>);
|
||||
assert (cp == <span class="literal">0x65e5</span>);
|
||||
assert (w == twochars);
|
||||
</pre>
|
||||
<p>
|
||||
In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
|
||||
thrown.
|
||||
|
@ -1011,6 +1051,42 @@ assert (w == twochars + <span class="literal">3</span>);
|
|||
This is a faster but less safe version of <code>utf8::next</code>. It does not
|
||||
check for validity of the supplied UTF-8 sequence.
|
||||
</p>
|
||||
<h4>
|
||||
utf8::unchecked::peek_next
|
||||
</h4>
|
||||
<p class="version">
|
||||
Available in version 2.1 and later.
|
||||
</p>
|
||||
<p>
|
||||
Given the iterator to the beginning of a UTF-8 sequence, it returns the code point.
|
||||
</p>
|
||||
<pre>
|
||||
<span class="keyword">template</span> <<span class=
|
||||
"keyword">typename</span> octet_iterator>
|
||||
uint32_t peek_next(octet_iterator it);
|
||||
|
||||
</pre>
|
||||
<p>
|
||||
<code>it</code>: an iterator pointing to the beginning of an UTF-8
|
||||
encoded code point.<br>
|
||||
<span class="return_value">Return value</span>: the 32 bit representation of the
|
||||
processed UTF-8 code point.
|
||||
</p>
|
||||
<p>
|
||||
Example of use:
|
||||
</p>
|
||||
<pre>
|
||||
<span class="keyword">char</span>* twochars = <span class=
|
||||
"literal">"\xe6\x97\xa5\xd1\x88"</span>;
|
||||
<span class="keyword">char</span>* w = twochars;
|
||||
<span class="keyword">int</span> cp = unchecked::peek_next(w);
|
||||
assert (cp == <span class="literal">0x65e5</span>);
|
||||
assert (w == twochars);
|
||||
</pre>
|
||||
<p>
|
||||
This is a faster but less safe version of <code>utf8::peek_next</code>. It does not
|
||||
check for validity of the supplied UTF-8 sequence.
|
||||
</p>
|
||||
<h4>
|
||||
utf8::unchecked::prior
|
||||
</h4>
|
||||
|
|
|
@ -59,7 +59,9 @@ int main(int argc, char** argv)
|
|||
unsigned char_count = 0;
|
||||
string::iterator it = line_start;
|
||||
while (it != line_end) {
|
||||
next(it, line_end);
|
||||
unsigned int next_cp = peek_next(it, line_end);
|
||||
if (next(it, line_end) != next_cp)
|
||||
cout << "Line " << line_count << ": Error: peek_next gave a different result than next" << '\n';
|
||||
char_count++;
|
||||
}
|
||||
if (char_count != utf32_line.size())
|
||||
|
@ -121,7 +123,9 @@ int main(int argc, char** argv)
|
|||
char_count = 0;
|
||||
it = line_start;
|
||||
while (it != line_end) {
|
||||
unchecked::next(it);
|
||||
unsigned int next_cp = unchecked::peek_next(it);
|
||||
if (unchecked::next(it) != next_cp)
|
||||
cout << "Line " << line_count << ": Error: unchecked::peek_next gave a different result than unchecked::next" << '\n';;
|
||||
char_count++;
|
||||
}
|
||||
if (char_count != utf32_line.size())
|
||||
|
|
Loading…
Add table
Reference in a new issue