Compare commits

..

84 commits

Author SHA1 Message Date
ntrifunovic
38a187a7dd Adding the LICENSE file
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@142 a809a056-fc17-0410-9590-b4f493f8b08e
2014-05-15 01:23:53 +00:00
ntrifunovic
cc3c158bf8 Introducing Boost Test for unit-testing v3.
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@141 a809a056-fc17-0410-9590-b4f493f8b08e
2013-03-09 20:51:50 +00:00
ntrifunovic
7075404ff0 First check in for branch 3.x - playing with utf8::append
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@140 a809a056-fc17-0410-9590-b4f493f8b08e
2013-02-24 03:06:50 +00:00
ntrifunovic
fa73898a3d Removing version 1_0 directory
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@139 a809a056-fc17-0410-9590-b4f493f8b08e
2013-02-18 00:05:43 +00:00
ntrifunovic
62b7d7ae0c Release 2.3.4
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@138 a809a056-fc17-0410-9590-b4f493f8b08e
2013-02-17 22:40:46 +00:00
ntrifunovic
596feae4b9 Release 2.3.3
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@137 a809a056-fc17-0410-9590-b4f493f8b08e
2013-02-16 16:30:43 +00:00
ntrifunovic
129a2f4508 Fix for bug ID: 3576827 - replace_invalid() only works with back_inserter
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@136 a809a056-fc17-0410-9590-b4f493f8b08e
2013-02-09 23:33:27 +00:00
ntrifunovic
7767eb67e8 Fixing a potential problem with utf8 to utf16/32 conversions
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@135 a809a056-fc17-0410-9590-b4f493f8b08e
2013-02-09 22:12:53 +00:00
ntrifunovic
d569ff9c55 Fixing a smoke-test warning
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@134 a809a056-fc17-0410-9590-b4f493f8b08e
2013-02-09 21:55:13 +00:00
ntrifunovic
7d589c4210 Fix for bug ID: 3602629 - extra ';' after member function definition
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@133 a809a056-fc17-0410-9590-b4f493f8b08e
2013-02-09 21:50:09 +00:00
ntrifunovic
100dd38c70 Release 2.3.2
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@132 a809a056-fc17-0410-9590-b4f493f8b08e
2012-05-26 23:56:44 +00:00
ntrifunovic
4720a99866 Removing a regression test for a reported bug that I am not fixing.
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@131 a809a056-fc17-0410-9590-b4f493f8b08e
2012-05-26 17:17:43 +00:00
ntrifunovic
adb7687b2f Fix for the bug 3506114: potential crash in replace_invalid
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@130 a809a056-fc17-0410-9590-b4f493f8b08e
2012-05-22 22:55:47 +00:00
ntrifunovic
cd80d5fa9e Changed validate_next to take a reference instead of pointer. Resulted in 5% performance improvement.
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@129 a809a056-fc17-0410-9590-b4f493f8b08e
2011-11-12 17:12:34 +00:00
ntrifunovic
a1eaf5688a Adding a regression test
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@128 a809a056-fc17-0410-9590-b4f493f8b08e
2011-11-06 16:13:52 +00:00
ntrifunovic
e464ef8e86 Fix for the bug ID: 3426789[guidline -> guideline]
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@127 a809a056-fc17-0410-9590-b4f493f8b08e
2011-10-29 22:26:12 +00:00
ntrifunovic
93286b9390 Removed some superfluous code
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@126 a809a056-fc17-0410-9590-b4f493f8b08e
2011-10-16 03:06:05 +00:00
ntrifunovic
7414d0fabf Changed the optimization option from O2 to O3 for the perf test
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@125 a809a056-fc17-0410-9590-b4f493f8b08e
2011-10-16 01:16:51 +00:00
ntrifunovic
26d8c8e424 Refactored internal functions in core.h
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@124 a809a056-fc17-0410-9590-b4f493f8b08e
2011-10-15 22:54:58 +00:00
ntrifunovic
36839ac4e7 Fixed iconv perf test to print out results
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@123 a809a056-fc17-0410-9590-b4f493f8b08e
2011-06-24 23:31:24 +00:00
ntrifunovic
9d7a97089c Fix for the bug [name clash with std::next - ID: 3215839]
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@122 a809a056-fc17-0410-9590-b4f493f8b08e
2011-06-24 23:21:41 +00:00
ntrifunovic
1c3b1a352e Fixed the negative test to reflect the latest changes in detecting invalid utf-8 text
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@121 a809a056-fc17-0410-9590-b4f493f8b08e
2011-02-20 21:02:33 +00:00
ntrifunovic
26b3524f45 Removed redundant regression tests
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@120 a809a056-fc17-0410-9590-b4f493f8b08e
2011-02-20 20:49:41 +00:00
ntrifunovic
5d8b75cd6b Release 2.3.1
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@119 a809a056-fc17-0410-9590-b4f493f8b08e
2011-02-20 18:52:44 +00:00
ntrifunovic
5347b21b56 Fix for ID: 3185087 - utf8::prior and utf8::previous documentation issue
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@118 a809a056-fc17-0410-9590-b4f493f8b08e
2011-02-20 18:33:36 +00:00
ntrifunovic
a4fce3befd Fix for the bug ID: 3083640 - is_code_point_valid incorrectly returns false
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@117 a809a056-fc17-0410-9590-b4f493f8b08e
2011-02-20 18:07:59 +00:00
ntrifunovic
2976b72daa Fix for [3167987]: prior moves it before start
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@116 a809a056-fc17-0410-9590-b4f493f8b08e
2011-02-15 01:18:49 +00:00
ntrifunovic
cc4fe49fdc Minor improvements to performance testing code
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@115 a809a056-fc17-0410-9590-b4f493f8b08e
2010-09-04 16:10:35 +00:00
ntrifunovic
05e6c4ad8d Fix for the bug ID: 3025042: is_bom documentation issue
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@114 a809a056-fc17-0410-9590-b4f493f8b08e
2010-09-04 15:47:12 +00:00
ntrifunovic
14acee1ec5 Release 2.3
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@113 a809a056-fc17-0410-9590-b4f493f8b08e
2010-04-18 00:29:14 +00:00
ntrifunovic
8039bd481b Completed documentation for the exceptions. Fixed bug ID: 2960112: is_bom wording fix
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@112 a809a056-fc17-0410-9590-b4f493f8b08e
2010-04-17 17:09:40 +00:00
ntrifunovic
656f3847e8 Feature request 2857462: Proposed minor extension: safe version of is_bom
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@111 a809a056-fc17-0410-9590-b4f493f8b08e
2009-12-20 22:46:01 +00:00
ntrifunovic
ac756dc9d6 Fix for the bug ID: 2915657 - 64bit portability issue
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@110 a809a056-fc17-0410-9590-b4f493f8b08e
2009-12-20 22:03:47 +00:00
ntrifunovic
0f2c72abf1 Removng the boost directory. Its purpose was to prepare a version of UTF8 CPP for submition to Boost. This plan does not seem feasible.
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@109 a809a056-fc17-0410-9590-b4f493f8b08e
2009-12-20 21:51:15 +00:00
ntrifunovic
59e75aa511 Feature ID 2885695: "Group" utf8 exceptions.
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@108 a809a056-fc17-0410-9590-b4f493f8b08e
2009-12-13 19:39:22 +00:00
ntrifunovic
baf711282e Fix for the bug [ID: 2906315]: < instead != in utf8to32
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@107 a809a056-fc17-0410-9590-b4f493f8b08e
2009-12-07 01:34:23 +00:00
ntrifunovic
301bd94165 Release 2.2.4
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@106 a809a056-fc17-0410-9590-b4f493f8b08e
2009-10-31 16:14:51 +00:00
ntrifunovic
a415a2f081 Fix for the bug ID: 2857454 [dereference invalid iterator]
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@105 a809a056-fc17-0410-9590-b4f493f8b08e
2009-10-29 01:18:27 +00:00
ntrifunovic
d97ccb32f7 Release 2.2.3
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@104 a809a056-fc17-0410-9590-b4f493f8b08e
2009-10-12 23:03:44 +00:00
ntrifunovic
ba4b4c1e83 Fixing regression test id_2857454
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@103 a809a056-fc17-0410-9590-b4f493f8b08e
2009-10-12 22:54:15 +00:00
ntrifunovic
da0c8b96d9 Fix for bug #ID: 2857456[redundant checks in append in checked.h]
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@102 a809a056-fc17-0410-9590-b4f493f8b08e
2009-10-04 18:17:22 +00:00
ntrifunovic
080865eb02 Added regression test for [ 2857454 ] dereference invalid iterator when lead surrogate was last element of the string
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@101 a809a056-fc17-0410-9590-b4f493f8b08e
2009-09-27 18:47:45 +00:00
ntrifunovic
f37a772149 Fix for bug ID: 2852872 [invalid utf16 strings were parsed without any error]
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@100 a809a056-fc17-0410-9590-b4f493f8b08e
2009-09-26 01:41:24 +00:00
ntrifunovic
6c3aa1f33e Added a regression test to detect a sequence of multiple trail surrogate code units
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@99 a809a056-fc17-0410-9590-b4f493f8b08e
2009-09-26 01:13:26 +00:00
ntrifunovic
06cc5cf480 Fix for the bug ID: 2830326: " multiple definition of `utf8::internal::is_overlong_sequence"
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@98 a809a056-fc17-0410-9590-b4f493f8b08e
2009-08-01 01:50:13 +00:00
ntrifunovic
3c9c379857 Release 2.2.1
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@97 a809a056-fc17-0410-9590-b4f493f8b08e
2009-07-28 00:51:51 +00:00
ntrifunovic
6c7224f4f2 Fixing the test drivers to work with GCC 4.3
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@96 a809a056-fc17-0410-9590-b4f493f8b08e
2009-07-28 00:40:12 +00:00
ntrifunovic
169bfe469c Fix for the bug ID: 2823847: warnings from GCC 4.3
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@95 a809a056-fc17-0410-9590-b4f493f8b08e
2009-07-28 00:31:03 +00:00
ntrifunovic
f344a3fb4d Release 2.2
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@94 a809a056-fc17-0410-9590-b4f493f8b08e
2009-07-07 00:47:54 +00:00
ntrifunovic
054defb568 Another update of documentation
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@93 a809a056-fc17-0410-9590-b4f493f8b08e
2009-07-07 00:46:34 +00:00
ntrifunovic
9d935b3c69 Updated documentation to include additional samples. Fixed a typo in core.h
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@92 a809a056-fc17-0410-9590-b4f493f8b08e
2009-07-05 21:14:40 +00:00
ntrifunovic
e2799bdab6 Removed std::distance from validate_next and (hopefully) made it work with input iterators. Also, did a major
refactoring of that function.


git-svn-id: http://svn.code.sf.net/p/utfcpp/code@91 a809a056-fc17-0410-9590-b4f493f8b08e
2009-07-05 00:09:18 +00:00
ntrifunovic
74be521392 Updated the documentation to have a better intro sample
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@90 a809a056-fc17-0410-9590-b4f493f8b08e
2009-07-03 19:40:14 +00:00
ntrifunovic
40a955eef6 Updated the docsample sample
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@89 a809a056-fc17-0410-9590-b4f493f8b08e
2009-07-03 15:28:31 +00:00
ntrifunovic
4df5e1c1ea Fixed the negative test and made the input file name the comand line argument
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@88 a809a056-fc17-0410-9590-b4f493f8b08e
2009-07-02 23:56:52 +00:00
ntrifunovic
5748eeff08 Removing bidirectional restrictions for the octet_iterator
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@87 a809a056-fc17-0410-9590-b4f493f8b08e
2009-07-01 11:55:37 +00:00
ntrifunovic
dacd49dde9 Fixed the signature for main in the sample
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@86 a809a056-fc17-0410-9590-b4f493f8b08e
2009-05-17 19:21:31 +00:00
ntrifunovic
76c6662ef9 Implemented feature request ID: 2515238; renamed an internal enum to avoid conflicts with macros from other libraries
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@85 a809a056-fc17-0410-9590-b4f493f8b08e
2009-03-14 17:19:41 +00:00
ntrifunovic
c92c41770d Release 2.1
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@84 a809a056-fc17-0410-9590-b4f493f8b08e
2007-12-16 18:52:45 +00:00
ntrifunovic
7568388d19 Updated the documentation and a test to include peek_next()
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@83 a809a056-fc17-0410-9590-b4f493f8b08e
2007-10-27 23:34:59 +00:00
ntrifunovic
d2081b8381 Added peek_next
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@82 a809a056-fc17-0410-9590-b4f493f8b08e
2007-10-25 22:12:22 +00:00
ntrifunovic
193c1032c2 Deleted the 2_1 branch. Decided against added the utf-8 string type, at least for now.
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@81 a809a056-fc17-0410-9590-b4f493f8b08e
2007-10-24 23:06:53 +00:00
ntrifunovic
f6668b3189 Version 2.1 branch
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@80 a809a056-fc17-0410-9590-b4f493f8b08e
2007-04-06 13:32:27 +00:00
ntrifunovic
f58bf21527 Release 2.0
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@79 a809a056-fc17-0410-9590-b4f493f8b08e
2007-02-25 00:26:48 +00:00
ntrifunovic
baf63b327a Updated documentation. Fixed a small bug in checked.h. Added new checks to the negative tests
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@78 a809a056-fc17-0410-9590-b4f493f8b08e
2007-02-25 00:16:10 +00:00
ntrifunovic
cd3092c0ca A minor documentation fix
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@77 a809a056-fc17-0410-9590-b4f493f8b08e
2006-12-18 02:20:56 +00:00
ntrifunovic
b4f5578f4d Updated buildrelease.pl for the 2.0 source code structure
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@76 a809a056-fc17-0410-9590-b4f493f8b08e
2006-12-18 02:18:30 +00:00
ntrifunovic
fe0be22e75 Release 2.0 Beta 1
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@75 a809a056-fc17-0410-9590-b4f493f8b08e
2006-12-18 01:52:36 +00:00
ntrifunovic
3df044a663 Added documentation for the iterator adapter
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@74 a809a056-fc17-0410-9590-b4f493f8b08e
2006-12-18 01:52:13 +00:00
ntrifunovic
83b6f918a9 Updated makefiles to reflect the new source structure
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@73 a809a056-fc17-0410-9590-b4f493f8b08e
2006-12-18 01:50:44 +00:00
ntrifunovic
e022e54c64 The requirement for octet_iterator is bidirectional rather than random access now. Other minor changes
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@72 a809a056-fc17-0410-9590-b4f493f8b08e
2006-12-18 01:49:58 +00:00
ntrifunovic
77c267b49e Added boost directory, updated documentation, fixed a typo in a local variable in the code
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@71 a809a056-fc17-0410-9590-b4f493f8b08e
2006-12-07 02:34:42 +00:00
ntrifunovic
6f08efdc90 Added unchecked::previous to the 1.x branch
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@70 a809a056-fc17-0410-9590-b4f493f8b08e
2006-11-23 18:11:24 +00:00
ntrifunovic
fb13348356 Added the unchecked iterator, added base() to the checked one, updated tests
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@69 a809a056-fc17-0410-9590-b4f493f8b08e
2006-11-23 18:10:26 +00:00
ntrifunovic
8da1b779ac Release notes for v1.02
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@68 a809a056-fc17-0410-9590-b4f493f8b08e
2006-11-20 20:05:44 +00:00
ntrifunovic
c7fd119bec deprecated previous and introduced prior instead
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@67 a809a056-fc17-0410-9590-b4f493f8b08e
2006-11-19 01:15:37 +00:00
ntrifunovic
e4dc80dae3 Added the checked iterator, function prior that replaces previous, and updated the html documentation
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@66 a809a056-fc17-0410-9590-b4f493f8b08e
2006-11-04 01:28:38 +00:00
ntrifunovic
d2ee7164b6 Added the first version of the iterator to the code. Started upgrading the html documentation
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@65 a809a056-fc17-0410-9590-b4f493f8b08e
2006-10-28 16:25:52 +00:00
ntrifunovic
24f4090afa Release 1.01
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@64 a809a056-fc17-0410-9590-b4f493f8b08e
2006-10-24 12:51:05 +00:00
ntrifunovic
f90dc28c5b Fix for the bug 1583547: exception::what is a const member function
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@63 a809a056-fc17-0410-9590-b4f493f8b08e
2006-10-24 12:41:15 +00:00
ntrifunovic
70bf3379df split the library into multiple .h files
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@62 a809a056-fc17-0410-9590-b4f493f8b08e
2006-10-20 22:37:59 +00:00
ntrifunovic
f0fce39119 Implemented replace_invalid functionality
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@61 a809a056-fc17-0410-9590-b4f493f8b08e
2006-10-07 21:25:47 +00:00
ntrifunovic
8af502d493 Version 2.x of the library
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@60 a809a056-fc17-0410-9590-b4f493f8b08e
2006-09-29 19:28:38 +00:00
ntrifunovic
9d706078c8 Moved the current tree under the new v1_0 directory. For the new development, I'll create new branches.
git-svn-id: http://svn.code.sf.net/p/utfcpp/code@59 a809a056-fc17-0410-9590-b4f493f8b08e
2006-09-15 20:07:05 +00:00
38 changed files with 220 additions and 119 deletions

View file

@ -1,68 +0,0 @@
// Copyright 2006 Nemanja Trifunovic
/*
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef id71B1E0983F3D4F7BAD0C091C4569AB37
#define id71B1E0983F3D4F7BAD0C091C4569AB37
#include <stdexcept>
namespace utf8 {
// Base for the exceptions that may be thrown from the library
class exception : public ::std::exception {
};
// Exceptions that may be thrown from the library functions.
class invalid_code_point : public exception {
uint32_t cp;
public:
invalid_code_point(uint32_t cp) : cp(cp) {}
virtual const char* what() const throw() { return "Invalid code point"; }
uint32_t code_point() const {return cp;}
};
class invalid_utf8 : public exception {
uint8_t u8;
public:
invalid_utf8 (uint8_t u) : u8(u) {}
virtual const char* what() const throw() { return "Invalid UTF-8"; }
uint8_t utf8_octet() const {return u8;}
};
class invalid_utf16 : public exception {
uint16_t u16;
public:
invalid_utf16 (uint16_t u) : u16(u) {}
virtual const char* what() const throw() { return "Invalid UTF-16"; }
uint16_t utf16_word() const {return u16;}
};
class not_enough_room : public exception {
public:
virtual const char* what() const throw() { return "Not enough space"; }
};
} //namespace utf8
#endif

5
v2_0/samples/Makefile Normal file
View file

@ -0,0 +1,5 @@
CC = g++
CFLAGS = -g -Wall -pedantic
docsample: docsample.cpp ../source/utf8.h
$(CC) $(CFLAGS) docsample.cpp -odocsample

View file

@ -29,12 +29,44 @@ DEALINGS IN THE SOFTWARE.
#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
#include "core.h"
#include "exception.h"
#include <cassert>
#include <cstddef>
#include <stdexcept>
namespace utf8
{
// Base for the exceptions that may be thrown from the library
class exception : public ::std::exception {
};
// Exceptions that may be thrown from the library functions.
class invalid_code_point : public exception {
uint32_t cp;
public:
invalid_code_point(uint32_t cp) : cp(cp) {}
virtual const char* what() const throw() { return "Invalid code point"; }
uint32_t code_point() const {return cp;}
};
class invalid_utf8 : public exception {
uint8_t u8;
public:
invalid_utf8 (uint8_t u) : u8(u) {}
virtual const char* what() const throw() { return "Invalid UTF-8"; }
uint8_t utf8_octet() const {return u8;}
};
class invalid_utf16 : public exception {
uint16_t u16;
public:
invalid_utf16 (uint16_t u) : u16(u) {}
virtual const char* what() const throw() { return "Invalid UTF-16"; }
uint16_t utf16_word() const {return u16;}
};
class not_enough_room : public exception {
public:
virtual const char* what() const throw() { return "Not enough space"; }
};
/// The library API - functions intended to be called by the users
template <typename octet_iterator>
@ -231,53 +263,9 @@ namespace utf8
return result;
}
// Error policies for the iterator class
template <typename I>
class ErrorPolicyThrow {
public:
static void check_in_range(const I& it, const I& range_start, const I& range_end)
{
if (it < range_start || it > range_end)
throw std::out_of_range("Invalid utf-8 iterator position");
}
static void check_same_range(const I& range_start_a, const I& range_start_b, const I& range_end_a, const I& range_end_b)
{
if (range_start_a != range_start_b || range_end_a != range_end_b)
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
}
};
template <typename I>
class ErrorPolicyAssert {
public:
static void check_in_range(const I& it, const I& range_start, const I& range_end)
{
#if defined(NDEBUG)
(void)it;
(void)range_start;
(void)range_end;
#else
assert(it >= range_start && it <= range_end);
#endif
}
static void check_same_range(const I& range_start_a, const I& range_start_b, const I& range_end_a, const I& range_end_b)
{
#if defined(NDEBUG)
(void)range_start_a;
(void)range_start_b;
(void)range_end_a;
(void)range_end_b;
#else
assert(range_start_a == range_start_b && range_end_a == range_end_b);
#endif
}
};
// The iterator class
template <
typename octet_iterator,
typename error_policy=ErrorPolicyThrow<octet_iterator>
>
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t, std::ptrdiff_t, uint32_t*, uint32_t> {
template <typename octet_iterator>
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
octet_iterator it;
octet_iterator range_start;
octet_iterator range_end;
@ -288,7 +276,8 @@ namespace utf8
const octet_iterator& range_end) :
it(octet_it), range_start(range_start), range_end(range_end)
{
error_policy::check_in_range(it, range_start, range_end);
if (it < range_start || it > range_end)
throw std::out_of_range("Invalid utf-8 iterator position");
}
// the default "big three" are OK
octet_iterator base () const { return it; }
@ -299,7 +288,8 @@ namespace utf8
}
bool operator == (const iterator& rhs) const
{
error_policy::check_same_range(range_start, rhs.range_start, range_end, rhs.range_end);
if (range_start != rhs.range_start || range_end != rhs.range_end)
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
return (it == rhs.it);
}
bool operator != (const iterator& rhs) const
@ -333,3 +323,5 @@ namespace utf8
} // namespace utf8
#endif //header guard

147
v3_0/src/utf8.h Normal file
View file

@ -0,0 +1,147 @@
// Copyright 2006-2013 Nemanja Trifunovic
/*
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*/
#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
// By default, utf8 cpp requires C++ Standard Library strings and exceptions
// The following macros can be used to change the default behavior
// #define UTF_CPP_NO_STD_STRING
// #define UTF_CPP_NO_EXCEPTIONS
#ifndef UTF_CPP_NO_EXCEPTIONS
#include <stdexcept>
#ifndef UTF_CPP_NO_STD_STRING
#include <string>
#include <iterator>
#endif // #ifndef UTF_CPP_NO_STD_STRING
#endif // #ifndef UTF_CPP_NO_EXCEPTIONS
namespace utf8
{
// Error codes - used internally and if exceptions disabled
enum class utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD,
INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
#ifndef UTF_CPP_NO_EXCEPTIONS
// Base for the exceptions that may be thrown from the library
class exception : public ::std::exception {
};
// Exceptions that may be thrown from the library functions.
class invalid_code_point : public exception {
char32_t cp;
public:
invalid_code_point(char32_t cp) : cp(cp) {}
virtual const char* what() const noexcept { return "Invalid code point"; }
char32_t code_point() const {return cp;}
};
#endif // #ifndef UTF_CPP_NO_EXCEPTIONS
// Helper code - not intended to be directly called by the library users. May be changed at any time
namespace internal
{
// Unicode constants
// Leading (high) surrogates: 0xd800 - 0xdbff
// Trailing (low) surrogates: 0xdc00 - 0xdfff
const char32_t LEAD_SURROGATE_MIN = 0x0000d800;
const char32_t LEAD_SURROGATE_MAX = 0x0000dbff;
const char32_t TRAIL_SURROGATE_MIN = 0x0000dc00;
const char32_t TRAIL_SURROGATE_MAX = 0x0000dfff;
// Maximum valid value for a Unicode code point
const char32_t CODE_POINT_MAX = 0x0010ffff;
inline bool is_surrogate(char32_t cp)
{
return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
}
inline bool is_code_point_valid(char32_t cp)
{
return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
}
} // namespace internal
/// The library API - functions intended to be called by the users
template <typename octet_iterator>
octet_iterator append(char32_t cp, octet_iterator result, utf_error& error)
{
if (!utf8::internal::is_code_point_valid(cp)) {
error = utf8::utf_error::INVALID_CODE_POINT;
return result;
}
if (cp < 0x80) // one octet
*(result++) = static_cast<char>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<char>((cp >> 6) | 0xc0);
*(result++) = static_cast<char>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<char>((cp >> 12) | 0xe0);
*(result++) = static_cast<char>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<char>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<char>((cp >> 18) | 0xf0);
*(result++) = static_cast<char>(((cp >> 12) & 0x3f) | 0x80);
*(result++) = static_cast<char>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<char>((cp & 0x3f) | 0x80);
}
return result;
}
#ifndef UTF_CPP_NO_EXCEPTIONS
template <typename octet_iterator>
octet_iterator append(char32_t cp, octet_iterator result)
{
utf8::utf_error err {utf8::utf_error::UTF8_OK};
utf8::append(cp, result, err);
if (err != utf8::utf_error::UTF8_OK)
throw utf8::invalid_code_point(cp);
return result;
}
#ifndef UTF_CPP_NO_STD_STRING
inline void append(char32_t cp, std::string& str)
{
utf8::append(cp, std::back_inserter(str));
}
#endif // #ifndef UTF_CPP_NO_STD_STRING
#endif // #ifndef UTF_CPP_NO_EXCEPTIONS
} // namespace utf8
#endif // header guard

6
v3_0/tests/Makefile Normal file
View file

@ -0,0 +1,6 @@
CC = g++
CFLAGS = -g -Wall --std=c++11
smoketest: unit.cpp ../src/utf8.h
$(CC) $(CFLAGS) unit.cpp -ounit -lboost_unit_test_framework
./unit

19
v3_0/tests/unit.cpp Normal file
View file

@ -0,0 +1,19 @@
#define BOOST_TEST_DYN_LINK
#define BOOST_TEST_MODULE UTF8_CPP_UNIT
#include <boost/test/unit_test.hpp>
#include "../src/utf8.h"
using namespace std;
BOOST_AUTO_TEST_CASE(append)
{
string s;
BOOST_CHECK_NO_THROW (utf8::append(U'\U00000448', s));
BOOST_CHECK (s.length() == 2 && s[0] == '\xd1' && s[1] == '\x88');
s.erase();
BOOST_CHECK_NO_THROW(utf8::append(U'\U000065e5', s));
BOOST_CHECK (s.length() == 3 && s[0] == '\xe6' && s[1] == '\x97' && s[2] == '\xa5');
}