Updated the documentation and a test to include peek_next()
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@83 a809a056-fc17-0410-9590-b4f493f8b08e
This commit is contained in:
parent
3a04fda526
commit
4d7ad9b625
2 changed files with 82 additions and 2 deletions
|
@ -294,6 +294,46 @@ assert (w == twochars + <span class="literal">3</span>);
|
||||||
<p>
|
<p>
|
||||||
This function is typically used to iterate through a UTF-8 encoded string.
|
This function is typically used to iterate through a UTF-8 encoded string.
|
||||||
</p>
|
</p>
|
||||||
|
<p>
|
||||||
|
In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
|
||||||
|
thrown.
|
||||||
|
</p>
|
||||||
|
<h4>
|
||||||
|
utf8::peek_next
|
||||||
|
</h4>
|
||||||
|
<p class="version">
|
||||||
|
Available in version 2.1 and later.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Given the iterator to the beginning of the UTF-8 sequence, it returns the code
|
||||||
|
point for the following sequence without changing the value of the iterator.
|
||||||
|
</p>
|
||||||
|
<pre>
|
||||||
|
<span class="keyword">template</span> <<span class=
|
||||||
|
"keyword">typename</span> octet_iterator>
|
||||||
|
uint32_t peek_next(octet_iterator it, octet_iterator end);
|
||||||
|
|
||||||
|
</pre>
|
||||||
|
<p>
|
||||||
|
<code>it</code>: an iterator pointing to the beginning of an UTF-8
|
||||||
|
encoded code point.<br>
|
||||||
|
<code>end</code>: end of the UTF-8 sequence to be processed. If <code>it</code>
|
||||||
|
gets equal to <code>end</code> during the extraction of a code point, an
|
||||||
|
<code>utf8::not_enough_room</code> exception is thrown.<br>
|
||||||
|
<span class="return_value">Return value</span>: the 32 bit representation of the
|
||||||
|
processed UTF-8 code point.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Example of use:
|
||||||
|
</p>
|
||||||
|
<pre>
|
||||||
|
<span class="keyword">char</span>* twochars = <span class=
|
||||||
|
"literal">"\xe6\x97\xa5\xd1\x88"</span>;
|
||||||
|
<span class="keyword">char</span>* w = twochars;
|
||||||
|
<span class="keyword">int</span> cp = peek_next(w, twochars + <span class="literal">6</span>);
|
||||||
|
assert (cp == <span class="literal">0x65e5</span>);
|
||||||
|
assert (w == twochars);
|
||||||
|
</pre>
|
||||||
<p>
|
<p>
|
||||||
In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
|
In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
|
||||||
thrown.
|
thrown.
|
||||||
|
@ -1011,6 +1051,42 @@ assert (w == twochars + <span class="literal">3</span>);
|
||||||
This is a faster but less safe version of <code>utf8::next</code>. It does not
|
This is a faster but less safe version of <code>utf8::next</code>. It does not
|
||||||
check for validity of the supplied UTF-8 sequence.
|
check for validity of the supplied UTF-8 sequence.
|
||||||
</p>
|
</p>
|
||||||
|
<h4>
|
||||||
|
utf8::unchecked::peek_next
|
||||||
|
</h4>
|
||||||
|
<p class="version">
|
||||||
|
Available in version 2.1 and later.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Given the iterator to the beginning of a UTF-8 sequence, it returns the code point.
|
||||||
|
</p>
|
||||||
|
<pre>
|
||||||
|
<span class="keyword">template</span> <<span class=
|
||||||
|
"keyword">typename</span> octet_iterator>
|
||||||
|
uint32_t peek_next(octet_iterator it);
|
||||||
|
|
||||||
|
</pre>
|
||||||
|
<p>
|
||||||
|
<code>it</code>: an iterator pointing to the beginning of an UTF-8
|
||||||
|
encoded code point.<br>
|
||||||
|
<span class="return_value">Return value</span>: the 32 bit representation of the
|
||||||
|
processed UTF-8 code point.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Example of use:
|
||||||
|
</p>
|
||||||
|
<pre>
|
||||||
|
<span class="keyword">char</span>* twochars = <span class=
|
||||||
|
"literal">"\xe6\x97\xa5\xd1\x88"</span>;
|
||||||
|
<span class="keyword">char</span>* w = twochars;
|
||||||
|
<span class="keyword">int</span> cp = unchecked::peek_next(w);
|
||||||
|
assert (cp == <span class="literal">0x65e5</span>);
|
||||||
|
assert (w == twochars);
|
||||||
|
</pre>
|
||||||
|
<p>
|
||||||
|
This is a faster but less safe version of <code>utf8::peek_next</code>. It does not
|
||||||
|
check for validity of the supplied UTF-8 sequence.
|
||||||
|
</p>
|
||||||
<h4>
|
<h4>
|
||||||
utf8::unchecked::prior
|
utf8::unchecked::prior
|
||||||
</h4>
|
</h4>
|
||||||
|
|
|
@ -59,7 +59,9 @@ int main(int argc, char** argv)
|
||||||
unsigned char_count = 0;
|
unsigned char_count = 0;
|
||||||
string::iterator it = line_start;
|
string::iterator it = line_start;
|
||||||
while (it != line_end) {
|
while (it != line_end) {
|
||||||
next(it, line_end);
|
unsigned int next_cp = peek_next(it, line_end);
|
||||||
|
if (next(it, line_end) != next_cp)
|
||||||
|
cout << "Line " << line_count << ": Error: peek_next gave a different result than next" << '\n';
|
||||||
char_count++;
|
char_count++;
|
||||||
}
|
}
|
||||||
if (char_count != utf32_line.size())
|
if (char_count != utf32_line.size())
|
||||||
|
@ -121,7 +123,9 @@ int main(int argc, char** argv)
|
||||||
char_count = 0;
|
char_count = 0;
|
||||||
it = line_start;
|
it = line_start;
|
||||||
while (it != line_end) {
|
while (it != line_end) {
|
||||||
unchecked::next(it);
|
unsigned int next_cp = unchecked::peek_next(it);
|
||||||
|
if (unchecked::next(it) != next_cp)
|
||||||
|
cout << "Line " << line_count << ": Error: unchecked::peek_next gave a different result than unchecked::next" << '\n';;
|
||||||
char_count++;
|
char_count++;
|
||||||
}
|
}
|
||||||
if (char_count != utf32_line.size())
|
if (char_count != utf32_line.size())
|
||||||
|
|
Loading…
Add table
Reference in a new issue