Compare commits
84 commits
Author | SHA1 | Date | |
---|---|---|---|
|
38a187a7dd | ||
|
cc3c158bf8 | ||
|
7075404ff0 | ||
|
fa73898a3d | ||
|
62b7d7ae0c | ||
|
596feae4b9 | ||
|
129a2f4508 | ||
|
7767eb67e8 | ||
|
d569ff9c55 | ||
|
7d589c4210 | ||
|
100dd38c70 | ||
|
4720a99866 | ||
|
adb7687b2f | ||
|
cd80d5fa9e | ||
|
a1eaf5688a | ||
|
e464ef8e86 | ||
|
93286b9390 | ||
|
7414d0fabf | ||
|
26d8c8e424 | ||
|
36839ac4e7 | ||
|
9d7a97089c | ||
|
1c3b1a352e | ||
|
26b3524f45 | ||
|
5d8b75cd6b | ||
|
5347b21b56 | ||
|
a4fce3befd | ||
|
2976b72daa | ||
|
cc4fe49fdc | ||
|
05e6c4ad8d | ||
|
14acee1ec5 | ||
|
8039bd481b | ||
|
656f3847e8 | ||
|
ac756dc9d6 | ||
|
0f2c72abf1 | ||
|
59e75aa511 | ||
|
baf711282e | ||
|
301bd94165 | ||
|
a415a2f081 | ||
|
d97ccb32f7 | ||
|
ba4b4c1e83 | ||
|
da0c8b96d9 | ||
|
080865eb02 | ||
|
f37a772149 | ||
|
6c3aa1f33e | ||
|
06cc5cf480 | ||
|
3c9c379857 | ||
|
6c7224f4f2 | ||
|
169bfe469c | ||
|
f344a3fb4d | ||
|
054defb568 | ||
|
9d935b3c69 | ||
|
e2799bdab6 | ||
|
74be521392 | ||
|
40a955eef6 | ||
|
4df5e1c1ea | ||
|
5748eeff08 | ||
|
dacd49dde9 | ||
|
76c6662ef9 | ||
|
c92c41770d | ||
|
7568388d19 | ||
|
d2081b8381 | ||
|
193c1032c2 | ||
|
f6668b3189 | ||
|
f58bf21527 | ||
|
baf63b327a | ||
|
cd3092c0ca | ||
|
b4f5578f4d | ||
|
fe0be22e75 | ||
|
3df044a663 | ||
|
83b6f918a9 | ||
|
e022e54c64 | ||
|
77c267b49e | ||
|
6f08efdc90 | ||
|
fb13348356 | ||
|
8da1b779ac | ||
|
c7fd119bec | ||
|
e4dc80dae3 | ||
|
d2ee7164b6 | ||
|
24f4090afa | ||
|
f90dc28c5b | ||
|
70bf3379df | ||
|
f0fce39119 | ||
|
8af502d493 | ||
|
9d706078c8 |
38 changed files with 220 additions and 119 deletions
|
@ -1,68 +0,0 @@
|
|||
// Copyright 2006 Nemanja Trifunovic
|
||||
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person or organization
|
||||
obtaining a copy of the software and accompanying documentation covered by
|
||||
this license (the "Software") to use, reproduce, display, distribute,
|
||||
execute, and transmit the Software, and to prepare derivative works of the
|
||||
Software, and to permit third-parties to whom the Software is furnished to
|
||||
do so, all subject to the following:
|
||||
|
||||
The copyright notices in the Software and this entire statement, including
|
||||
the above license grant, this restriction and the following disclaimer,
|
||||
must be included in all copies of the Software, in whole or in part, and
|
||||
all derivative works of the Software, unless such copies or derivative
|
||||
works are solely in the form of machine-executable object code generated by
|
||||
a source language processor.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef id71B1E0983F3D4F7BAD0C091C4569AB37
|
||||
#define id71B1E0983F3D4F7BAD0C091C4569AB37
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
namespace utf8 {
|
||||
// Base for the exceptions that may be thrown from the library
|
||||
class exception : public ::std::exception {
|
||||
};
|
||||
|
||||
// Exceptions that may be thrown from the library functions.
|
||||
class invalid_code_point : public exception {
|
||||
uint32_t cp;
|
||||
public:
|
||||
invalid_code_point(uint32_t cp) : cp(cp) {}
|
||||
virtual const char* what() const throw() { return "Invalid code point"; }
|
||||
uint32_t code_point() const {return cp;}
|
||||
};
|
||||
|
||||
class invalid_utf8 : public exception {
|
||||
uint8_t u8;
|
||||
public:
|
||||
invalid_utf8 (uint8_t u) : u8(u) {}
|
||||
virtual const char* what() const throw() { return "Invalid UTF-8"; }
|
||||
uint8_t utf8_octet() const {return u8;}
|
||||
};
|
||||
|
||||
class invalid_utf16 : public exception {
|
||||
uint16_t u16;
|
||||
public:
|
||||
invalid_utf16 (uint16_t u) : u16(u) {}
|
||||
virtual const char* what() const throw() { return "Invalid UTF-16"; }
|
||||
uint16_t utf16_word() const {return u16;}
|
||||
};
|
||||
|
||||
class not_enough_room : public exception {
|
||||
public:
|
||||
virtual const char* what() const throw() { return "Not enough space"; }
|
||||
};
|
||||
} //namespace utf8
|
||||
|
||||
#endif
|
5
v2_0/samples/Makefile
Normal file
5
v2_0/samples/Makefile
Normal file
|
@ -0,0 +1,5 @@
|
|||
CC = g++
|
||||
CFLAGS = -g -Wall -pedantic
|
||||
|
||||
docsample: docsample.cpp ../source/utf8.h
|
||||
$(CC) $(CFLAGS) docsample.cpp -odocsample
|
|
@ -29,12 +29,44 @@ DEALINGS IN THE SOFTWARE.
|
|||
#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
||||
|
||||
#include "core.h"
|
||||
#include "exception.h"
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace utf8
|
||||
{
|
||||
// Base for the exceptions that may be thrown from the library
|
||||
class exception : public ::std::exception {
|
||||
};
|
||||
|
||||
// Exceptions that may be thrown from the library functions.
|
||||
class invalid_code_point : public exception {
|
||||
uint32_t cp;
|
||||
public:
|
||||
invalid_code_point(uint32_t cp) : cp(cp) {}
|
||||
virtual const char* what() const throw() { return "Invalid code point"; }
|
||||
uint32_t code_point() const {return cp;}
|
||||
};
|
||||
|
||||
class invalid_utf8 : public exception {
|
||||
uint8_t u8;
|
||||
public:
|
||||
invalid_utf8 (uint8_t u) : u8(u) {}
|
||||
virtual const char* what() const throw() { return "Invalid UTF-8"; }
|
||||
uint8_t utf8_octet() const {return u8;}
|
||||
};
|
||||
|
||||
class invalid_utf16 : public exception {
|
||||
uint16_t u16;
|
||||
public:
|
||||
invalid_utf16 (uint16_t u) : u16(u) {}
|
||||
virtual const char* what() const throw() { return "Invalid UTF-16"; }
|
||||
uint16_t utf16_word() const {return u16;}
|
||||
};
|
||||
|
||||
class not_enough_room : public exception {
|
||||
public:
|
||||
virtual const char* what() const throw() { return "Not enough space"; }
|
||||
};
|
||||
|
||||
/// The library API - functions intended to be called by the users
|
||||
|
||||
template <typename octet_iterator>
|
||||
|
@ -231,53 +263,9 @@ namespace utf8
|
|||
return result;
|
||||
}
|
||||
|
||||
// Error policies for the iterator class
|
||||
template <typename I>
|
||||
class ErrorPolicyThrow {
|
||||
public:
|
||||
static void check_in_range(const I& it, const I& range_start, const I& range_end)
|
||||
{
|
||||
if (it < range_start || it > range_end)
|
||||
throw std::out_of_range("Invalid utf-8 iterator position");
|
||||
}
|
||||
static void check_same_range(const I& range_start_a, const I& range_start_b, const I& range_end_a, const I& range_end_b)
|
||||
{
|
||||
if (range_start_a != range_start_b || range_end_a != range_end_b)
|
||||
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
|
||||
}
|
||||
};
|
||||
template <typename I>
|
||||
class ErrorPolicyAssert {
|
||||
public:
|
||||
static void check_in_range(const I& it, const I& range_start, const I& range_end)
|
||||
{
|
||||
#if defined(NDEBUG)
|
||||
(void)it;
|
||||
(void)range_start;
|
||||
(void)range_end;
|
||||
#else
|
||||
assert(it >= range_start && it <= range_end);
|
||||
#endif
|
||||
}
|
||||
static void check_same_range(const I& range_start_a, const I& range_start_b, const I& range_end_a, const I& range_end_b)
|
||||
{
|
||||
#if defined(NDEBUG)
|
||||
(void)range_start_a;
|
||||
(void)range_start_b;
|
||||
(void)range_end_a;
|
||||
(void)range_end_b;
|
||||
#else
|
||||
assert(range_start_a == range_start_b && range_end_a == range_end_b);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
// The iterator class
|
||||
template <
|
||||
typename octet_iterator,
|
||||
typename error_policy=ErrorPolicyThrow<octet_iterator>
|
||||
>
|
||||
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t, std::ptrdiff_t, uint32_t*, uint32_t> {
|
||||
template <typename octet_iterator>
|
||||
class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
|
||||
octet_iterator it;
|
||||
octet_iterator range_start;
|
||||
octet_iterator range_end;
|
||||
|
@ -288,7 +276,8 @@ namespace utf8
|
|||
const octet_iterator& range_end) :
|
||||
it(octet_it), range_start(range_start), range_end(range_end)
|
||||
{
|
||||
error_policy::check_in_range(it, range_start, range_end);
|
||||
if (it < range_start || it > range_end)
|
||||
throw std::out_of_range("Invalid utf-8 iterator position");
|
||||
}
|
||||
// the default "big three" are OK
|
||||
octet_iterator base () const { return it; }
|
||||
|
@ -299,7 +288,8 @@ namespace utf8
|
|||
}
|
||||
bool operator == (const iterator& rhs) const
|
||||
{
|
||||
error_policy::check_same_range(range_start, rhs.range_start, range_end, rhs.range_end);
|
||||
if (range_start != rhs.range_start || range_end != rhs.range_end)
|
||||
throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
|
||||
return (it == rhs.it);
|
||||
}
|
||||
bool operator != (const iterator& rhs) const
|
||||
|
@ -333,3 +323,5 @@ namespace utf8
|
|||
} // namespace utf8
|
||||
|
||||
#endif //header guard
|
||||
|
||||
|
147
v3_0/src/utf8.h
Normal file
147
v3_0/src/utf8.h
Normal file
|
@ -0,0 +1,147 @@
|
|||
// Copyright 2006-2013 Nemanja Trifunovic
|
||||
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person or organization
|
||||
obtaining a copy of the software and accompanying documentation covered by
|
||||
this license (the "Software") to use, reproduce, display, distribute,
|
||||
execute, and transmit the Software, and to prepare derivative works of the
|
||||
Software, and to permit third-parties to whom the Software is furnished to
|
||||
do so, all subject to the following:
|
||||
|
||||
The copyright notices in the Software and this entire statement, including
|
||||
the above license grant, this restriction and the following disclaimer,
|
||||
must be included in all copies of the Software, in whole or in part, and
|
||||
all derivative works of the Software, unless such copies or derivative
|
||||
works are solely in the form of machine-executable object code generated by
|
||||
a source language processor.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
||||
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
||||
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
||||
#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
|
||||
|
||||
// By default, utf8 cpp requires C++ Standard Library strings and exceptions
|
||||
// The following macros can be used to change the default behavior
|
||||
|
||||
// #define UTF_CPP_NO_STD_STRING
|
||||
// #define UTF_CPP_NO_EXCEPTIONS
|
||||
|
||||
#ifndef UTF_CPP_NO_EXCEPTIONS
|
||||
#include <stdexcept>
|
||||
|
||||
#ifndef UTF_CPP_NO_STD_STRING
|
||||
#include <string>
|
||||
#include <iterator>
|
||||
|
||||
#endif // #ifndef UTF_CPP_NO_STD_STRING
|
||||
#endif // #ifndef UTF_CPP_NO_EXCEPTIONS
|
||||
|
||||
namespace utf8
|
||||
{
|
||||
|
||||
// Error codes - used internally and if exceptions disabled
|
||||
enum class utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD,
|
||||
INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
|
||||
|
||||
#ifndef UTF_CPP_NO_EXCEPTIONS
|
||||
// Base for the exceptions that may be thrown from the library
|
||||
class exception : public ::std::exception {
|
||||
};
|
||||
// Exceptions that may be thrown from the library functions.
|
||||
|
||||
class invalid_code_point : public exception {
|
||||
char32_t cp;
|
||||
public:
|
||||
invalid_code_point(char32_t cp) : cp(cp) {}
|
||||
virtual const char* what() const noexcept { return "Invalid code point"; }
|
||||
char32_t code_point() const {return cp;}
|
||||
};
|
||||
|
||||
#endif // #ifndef UTF_CPP_NO_EXCEPTIONS
|
||||
|
||||
// Helper code - not intended to be directly called by the library users. May be changed at any time
|
||||
namespace internal
|
||||
{
|
||||
// Unicode constants
|
||||
// Leading (high) surrogates: 0xd800 - 0xdbff
|
||||
// Trailing (low) surrogates: 0xdc00 - 0xdfff
|
||||
const char32_t LEAD_SURROGATE_MIN = 0x0000d800;
|
||||
const char32_t LEAD_SURROGATE_MAX = 0x0000dbff;
|
||||
const char32_t TRAIL_SURROGATE_MIN = 0x0000dc00;
|
||||
const char32_t TRAIL_SURROGATE_MAX = 0x0000dfff;
|
||||
|
||||
// Maximum valid value for a Unicode code point
|
||||
const char32_t CODE_POINT_MAX = 0x0010ffff;
|
||||
|
||||
|
||||
inline bool is_surrogate(char32_t cp)
|
||||
{
|
||||
return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
|
||||
}
|
||||
|
||||
inline bool is_code_point_valid(char32_t cp)
|
||||
{
|
||||
return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
|
||||
}
|
||||
} // namespace internal
|
||||
|
||||
/// The library API - functions intended to be called by the users
|
||||
|
||||
template <typename octet_iterator>
|
||||
octet_iterator append(char32_t cp, octet_iterator result, utf_error& error)
|
||||
{
|
||||
if (!utf8::internal::is_code_point_valid(cp)) {
|
||||
error = utf8::utf_error::INVALID_CODE_POINT;
|
||||
return result;
|
||||
}
|
||||
|
||||
if (cp < 0x80) // one octet
|
||||
*(result++) = static_cast<char>(cp);
|
||||
else if (cp < 0x800) { // two octets
|
||||
*(result++) = static_cast<char>((cp >> 6) | 0xc0);
|
||||
*(result++) = static_cast<char>((cp & 0x3f) | 0x80);
|
||||
}
|
||||
else if (cp < 0x10000) { // three octets
|
||||
*(result++) = static_cast<char>((cp >> 12) | 0xe0);
|
||||
*(result++) = static_cast<char>(((cp >> 6) & 0x3f) | 0x80);
|
||||
*(result++) = static_cast<char>((cp & 0x3f) | 0x80);
|
||||
}
|
||||
else { // four octets
|
||||
*(result++) = static_cast<char>((cp >> 18) | 0xf0);
|
||||
*(result++) = static_cast<char>(((cp >> 12) & 0x3f) | 0x80);
|
||||
*(result++) = static_cast<char>(((cp >> 6) & 0x3f) | 0x80);
|
||||
*(result++) = static_cast<char>((cp & 0x3f) | 0x80);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifndef UTF_CPP_NO_EXCEPTIONS
|
||||
template <typename octet_iterator>
|
||||
octet_iterator append(char32_t cp, octet_iterator result)
|
||||
{
|
||||
utf8::utf_error err {utf8::utf_error::UTF8_OK};
|
||||
utf8::append(cp, result, err);
|
||||
if (err != utf8::utf_error::UTF8_OK)
|
||||
throw utf8::invalid_code_point(cp);
|
||||
return result;
|
||||
}
|
||||
#ifndef UTF_CPP_NO_STD_STRING
|
||||
inline void append(char32_t cp, std::string& str)
|
||||
{
|
||||
utf8::append(cp, std::back_inserter(str));
|
||||
}
|
||||
#endif // #ifndef UTF_CPP_NO_STD_STRING
|
||||
#endif // #ifndef UTF_CPP_NO_EXCEPTIONS
|
||||
|
||||
} // namespace utf8
|
||||
|
||||
#endif // header guard
|
||||
|
6
v3_0/tests/Makefile
Normal file
6
v3_0/tests/Makefile
Normal file
|
@ -0,0 +1,6 @@
|
|||
CC = g++
|
||||
CFLAGS = -g -Wall --std=c++11
|
||||
|
||||
smoketest: unit.cpp ../src/utf8.h
|
||||
$(CC) $(CFLAGS) unit.cpp -ounit -lboost_unit_test_framework
|
||||
./unit
|
19
v3_0/tests/unit.cpp
Normal file
19
v3_0/tests/unit.cpp
Normal file
|
@ -0,0 +1,19 @@
|
|||
#define BOOST_TEST_DYN_LINK
|
||||
#define BOOST_TEST_MODULE UTF8_CPP_UNIT
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
||||
#include "../src/utf8.h"
|
||||
using namespace std;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(append)
|
||||
{
|
||||
string s;
|
||||
BOOST_CHECK_NO_THROW (utf8::append(U'\U00000448', s));
|
||||
BOOST_CHECK (s.length() == 2 && s[0] == '\xd1' && s[1] == '\x88');
|
||||
|
||||
s.erase();
|
||||
BOOST_CHECK_NO_THROW(utf8::append(U'\U000065e5', s));
|
||||
BOOST_CHECK (s.length() == 3 && s[0] == '\xe6' && s[1] == '\x97' && s[2] == '\xa5');
|
||||
}
|
||||
|
||||
|
Loading…
Reference in a new issue