From 90968e75cd2e051e85db7e7efdbe9f4e06adbf88 Mon Sep 17 00:00:00 2001 From: King_DuckZ Date: Fri, 20 Jun 2014 19:04:05 +0200 Subject: [PATCH] Conversion to gtest of the utf8reader test. Note that the unckecked part has been removed as the "unchecked" code is going to be removed soon. --- test/unit/tests/CMakeLists.txt | 17 ++- test/unit/tests/src/unit_config.h.in | 3 + test/unit/tests/src/utf8reader.cpp | 84 +++++++++++++ test_drivers/Makefile | 7 +- test_drivers/utf8reader/Makefile | 5 - test_drivers/utf8reader/utf8reader.cpp | 160 ------------------------- 6 files changed, 100 insertions(+), 176 deletions(-) create mode 100644 test/unit/tests/src/utf8reader.cpp delete mode 100644 test_drivers/utf8reader/Makefile delete mode 100644 test_drivers/utf8reader/utf8reader.cpp diff --git a/test/unit/tests/CMakeLists.txt b/test/unit/tests/CMakeLists.txt index bc25294..8ee5091 100644 --- a/test/unit/tests/CMakeLists.txt +++ b/test/unit/tests/CMakeLists.txt @@ -1,11 +1,15 @@ project(unit CXX) -set(PATH_UTF8_INVALID_TXT "${UNITTEST_DATA_DIR}/negative/utf8_invalid.txt") -configure_file( - "${PATH_UTF8_INVALID_TXT}" - "${PROJECT_BINARY_DIR}/utf8_invalid.txt" - COPYONLY -) +set(PATH_UTF8_INVALID_TXT "${PROJECT_BINARY_DIR}/utf8_invalid.txt") +set(PATH_UTF8_VALID1_TXT "${PROJECT_BINARY_DIR}/quickbrown.txt") +set(PATH_UTF8_VALID2_HTML "${PROJECT_BINARY_DIR}/Unicode_transcriptions.html") +set(PATH_UTF8_VALID3_TXT "${PROJECT_BINARY_DIR}/UTF-8-demo.txt") + +configure_file("${UNITTEST_DATA_DIR}/negative/utf8_invalid.txt" "${PATH_UTF8_INVALID_TXT}" COPYONLY) +configure_file("${UNITTEST_DATA_DIR}/utf8samples/quickbrown.txt" "${PATH_UTF8_VALID1_TXT}" COPYONLY) +configure_file("${UNITTEST_DATA_DIR}/utf8samples/Unicode_transcriptions.html" "${PATH_UTF8_VALID2_HTML}" COPYONLY) +configure_file("${UNITTEST_DATA_DIR}/utf8samples/UTF-8-demo.txt" "${PATH_UTF8_VALID3_TXT}" COPYONLY) + configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/${PROJECT_NAME}_config.h.in" "${PROJECT_BINARY_DIR}/${PROJECT_NAME}_config.h" @@ -18,6 +22,7 @@ include_directories( add_executable(${PROJECT_NAME} ${GTEST_MAIN_CPP} src/negative.cpp + src/utf8reader.cpp ) target_link_libraries(${PROJECT_NAME} diff --git a/test/unit/tests/src/unit_config.h.in b/test/unit/tests/src/unit_config.h.in index 66c97ed..2d47a82 100644 --- a/test/unit/tests/src/unit_config.h.in +++ b/test/unit/tests/src/unit_config.h.in @@ -2,5 +2,8 @@ #define idAC5D2FB2938B4443A35A6841A057D467 #define PATH_UTF8_INVALID_TXT "@PATH_UTF8_INVALID_TXT@" +#define PATH_UTF8_VALID1_TXT "@PATH_UTF8_VALID1_TXT@" +#define PATH_UTF8_VALID2_HTML "@PATH_UTF8_VALID2_HTML@" +#define PATH_UTF8_VALID3_TXT "@PATH_UTF8_VALID3_TXT@" #endif diff --git a/test/unit/tests/src/utf8reader.cpp b/test/unit/tests/src/utf8reader.cpp new file mode 100644 index 0000000..25f2f82 --- /dev/null +++ b/test/unit/tests/src/utf8reader.cpp @@ -0,0 +1,84 @@ +#include "utf8.h" +#include "unit_config.h" +#include +#include +#include +#include +#include + +namespace { + void TestReadingFile (const char* parSourcePath) { + // Open the test file + std::ifstream fs8(parSourcePath); + ASSERT_TRUE(fs8.is_open()); + + // Read it line by line + unsigned int line_count = 0; + char byte; + while (!fs8.eof()) { + std::string line; + while ((byte = static_cast(fs8.get())) != '\n' && !fs8.eof()) + line.push_back(byte); + + line_count++; + // Play around with each line and convert it to utf16 + std::string::iterator line_start = line.begin(); + std::string::iterator line_end = line.end(); + line_end = utf8::find_invalid(line_start, line_end); + EXPECT_EQ(line_end, line.end()) << "Line " << line_count << ": Invalid utf-8 at byte " << int(line.end() - line_end); + + // Convert it to utf-16 and write to the file + std::vector utf16_line; + utf8::utf8to16(line_start, line_end, std::back_inserter(utf16_line)); + + // Back to utf-8 and compare it to the original line. + std::string back_to_utf8; + utf8::utf16to8(utf16_line.begin(), utf16_line.end(), std::back_inserter(back_to_utf8)); + EXPECT_EQ(back_to_utf8.compare(std::string(line_start, line_end)), 0) <<"Line " << line_count << ": Conversion to UTF-16 and back failed"; + + // Now, convert it to utf-32, back to utf-8 and compare + std::vector utf32_line; + utf8::utf8to32(line_start, line_end, std::back_inserter(utf32_line)); + back_to_utf8.clear(); + utf8::utf32to8(utf32_line.begin(), utf32_line.end(), std::back_inserter(back_to_utf8)); + EXPECT_EQ(back_to_utf8.compare(std::string(line_start, line_end)), 0) << "Line " << line_count << ": Conversion to UTF-32 and back failed"; + + // Now, iterate and back + unsigned char_count = 0; + std::string::iterator it = line_start; + while (it != line_end) { + unsigned int next_cp = utf8::peek_next(it, line_end); + EXPECT_EQ(utf8::next(it, line_end), next_cp) << "Line " << line_count << ": Error: peek_next gave a different result than next"; + char_count++; + } + EXPECT_EQ(char_count, utf32_line.size()) << "Line " << line_count << ": Error in iterating with next - wrong number of characters"; + + std::string::iterator adv_it = line_start; + utf8::advance(adv_it, char_count, line_end); + EXPECT_EQ(adv_it, line_end) << "Line " << line_count << ": Error in advance function"; + + EXPECT_EQ(std::string::size_type(utf8::distance(line_start, line_end)), char_count) << "Line " << line_count << ": Error in distance function"; + + while (it != line_start) { + utf8::previous(it, line.rend().base()); + char_count--; + } + EXPECT_EQ(char_count, 0) << "Line " << line_count << ": Error in iterating with previous - wrong number of characters"; + + // Try utf8::iterator + utf8::iterator u8it(line_start, line_start, line_end); + EXPECT_FALSE(not utf32_line.empty() and *u8it != utf32_line.at(0)) << "Line " << line_count << ": Error in utf::iterator * operator"; + const size_t calculatedDist = std::distance(u8it, utf8::iterator(line_end, line_start, line_end)); + EXPECT_EQ(calculatedDist, static_cast(utf32_line.size())) <<"Line " << line_count << ": Error in using utf::iterator with std::distance - wrong number of characters"; + + std::advance(u8it, utf32_line.size()); + EXPECT_EQ(u8it, utf8::iterator(line_end, line_start, line_end)) << "Line " << line_count << ": Error in using utf::iterator with std::advance"; + } + } +} //unnamed namespace + +TEST(Utf8, Reader) { + TestReadingFile(PATH_UTF8_VALID1_TXT); + TestReadingFile(PATH_UTF8_VALID2_HTML); + TestReadingFile(PATH_UTF8_VALID3_TXT); +} diff --git a/test_drivers/Makefile b/test_drivers/Makefile index 9fa1482..6d919a2 100644 --- a/test_drivers/Makefile +++ b/test_drivers/Makefile @@ -1,7 +1,7 @@ CC = g++ CFLAGS = -g -all: smoketest regressiontest utf8readertest +all: smoketest regressiontest smoketest: cd smoke_test && $(MAKE) $@ @@ -9,8 +9,5 @@ smoketest: regressiontest: cd regression_tests && $(MAKE) $@ -utf8readertest: - cd utf8reader && $(MAKE) $@ - clean: - rm smoke_test/smoketest regression_tests/regressiontest utf8reader/utf8reader + rm smoke_test/smoketest regression_tests/regressiontest diff --git a/test_drivers/utf8reader/Makefile b/test_drivers/utf8reader/Makefile deleted file mode 100644 index 29a9aa8..0000000 --- a/test_drivers/utf8reader/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -CC = g++ -CFLAGS = -g -Wall -pedantic - -utf8readertest: utf8reader.cpp ../../source/utf8.h ../../source/utf8/core.h ../../source/utf8/checked.h ../../source/utf8/unchecked.h - $(CC) $(CFLAGS) utf8reader.cpp -o utf8reader diff --git a/test_drivers/utf8reader/utf8reader.cpp b/test_drivers/utf8reader/utf8reader.cpp deleted file mode 100644 index c88a5ee..0000000 --- a/test_drivers/utf8reader/utf8reader.cpp +++ /dev/null @@ -1,160 +0,0 @@ -#include "../../source/utf8.h" -using namespace utf8; - -#include -#include -#include -#include -using namespace std; - -int main(int argc, char** argv) -{ - if (argc != 2) { - cout << "\nUsage: utfreader filename\n"; - return 0; - } - const char* TEST_FILE_PATH = argv[1]; - // Open the test file - ifstream fs8(TEST_FILE_PATH); - if (!fs8.is_open()) { - cout << "Could not open " << TEST_FILE_PATH << endl; - return 0; - } - - // Read it line by line - unsigned int line_count = 0; - char byte; - while (!fs8.eof()) { - string line; - while ((byte = static_cast(fs8.get())) != '\n' && !fs8.eof()) - line.push_back(byte); - - line_count++; - // Play around with each line and convert it to utf16 - string::iterator line_start = line.begin(); - string::iterator line_end = line.end(); - line_end = find_invalid(line_start, line_end); - if (line_end != line.end()) - cout << "Line " << line_count << ": Invalid utf-8 at byte " << int(line.end() - line_end) << '\n'; - - // Convert it to utf-16 and write to the file - vector utf16_line; - utf8to16(line_start, line_end, back_inserter(utf16_line)); - - // Back to utf-8 and compare it to the original line. - string back_to_utf8; - utf16to8(utf16_line.begin(), utf16_line.end(), back_inserter(back_to_utf8)); - if (back_to_utf8.compare(string(line_start, line_end)) != 0) - cout << "Line " << line_count << ": Conversion to UTF-16 and back failed" << '\n'; - - // Now, convert it to utf-32, back to utf-8 and compare - vector utf32_line; - utf8to32(line_start, line_end, back_inserter(utf32_line)); - back_to_utf8.clear(); - utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8)); - if (back_to_utf8.compare(string(line_start, line_end)) != 0) - cout << "Line " << line_count << ": Conversion to UTF-32 and back failed" << '\n'; - - // Now, iterate and back - unsigned char_count = 0; - string::iterator it = line_start; - while (it != line_end) { - unsigned int next_cp = peek_next(it, line_end); - if (next(it, line_end) != next_cp) - cout << "Line " << line_count << ": Error: peek_next gave a different result than next" << '\n'; - char_count++; - } - if (char_count != utf32_line.size()) - cout << "Line " << line_count << ": Error in iterating with next - wrong number of characters" << '\n'; - - string::iterator adv_it = line_start; - utf8::advance(adv_it, char_count, line_end); - if (adv_it != line_end) - cout << "Line " << line_count << ": Error in advance function" << '\n'; - - if (string::size_type(utf8::distance(line_start, line_end)) != char_count) - cout << "Line " << line_count << ": Error in distance function" << '\n'; - - while (it != line_start) { - previous(it, line.rend().base()); - char_count--; - } - if (char_count != 0) - cout << "Line " << line_count << ": Error in iterating with previous - wrong number of characters" << '\n'; - - // Try utf8::iterator - utf8::iterator u8it(line_start, line_start, line_end); - if (!utf32_line.empty() && *u8it != utf32_line.at(0)) - cout << "Line " << line_count << ": Error in utf::iterator * operator" << '\n'; - if (std::distance(u8it, utf8::iterator(line_end, line_start, line_end)) != static_cast(utf32_line.size())) - cout << "Line " << line_count << ": Error in using utf::iterator with std::distance - wrong number of characters" << '\n'; - - std::advance(u8it, utf32_line.size()); - if (u8it != utf8::iterator(line_end, line_start, line_end)) - cout << "Line " << line_count << ": Error in using utf::iterator with std::advance" << '\n'; - - - //======================== Now, the unchecked versions ====================== - // Convert it to utf-16 and compare to the checked version - vector utf16_line_unchecked; - unchecked::utf8to16(line_start, line_end, back_inserter(utf16_line_unchecked)); - - if (utf16_line != utf16_line_unchecked) - cout << "Line " << line_count << ": Error in unchecked::utf8to16" << '\n'; - - // Back to utf-8 and compare it to the original line. - back_to_utf8.clear(); - unchecked::utf16to8(utf16_line_unchecked.begin(), utf16_line_unchecked.end(), back_inserter(back_to_utf8)); - if (back_to_utf8.compare(string(line_start, line_end)) != 0) - cout << "Line " << line_count << ": Unchecked conversion to UTF-16 and back failed" << '\n'; - - // Now, convert it to utf-32, back to utf-8 and compare - vector utf32_line_unchecked; - unchecked::utf8to32(line_start, line_end, back_inserter(utf32_line_unchecked)); - if (utf32_line != utf32_line_unchecked) - cout << "Line " << line_count << ": Error in unchecked::utf8to32" << '\n'; - - back_to_utf8.clear(); - unchecked::utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8)); - if (back_to_utf8.compare(string(line_start, line_end)) != 0) - cout << "Line " << line_count << ": Unchecked conversion to UTF-32 and back failed" << '\n'; - - // Now, iterate and back - char_count = 0; - it = line_start; - while (it != line_end) { - unsigned int next_cp = unchecked::peek_next(it); - if (unchecked::next(it) != next_cp) - cout << "Line " << line_count << ": Error: unchecked::peek_next gave a different result than unchecked::next" << '\n';; - char_count++; - } - if (char_count != utf32_line.size()) - cout << "Line " << line_count << ": Error in iterating with unchecked::next - wrong number of characters" << '\n'; - - adv_it = line_start; - utf8::unchecked::advance(adv_it, char_count); - if (adv_it != line_end) - cout << "Line " << line_count << ": Error in unchecked::advance function" << '\n'; - - if (string::size_type(utf8::unchecked::distance(line_start, line_end)) != char_count) - cout << "Line " << line_count << ": Error in unchecked::distance function" << '\n'; - - while (it != line_start) { - unchecked::previous(it); - char_count--; - } - if (char_count != 0) - cout << "Line " << line_count << ": Error in iterating with unchecked::previous - wrong number of characters" << '\n'; - - // Try utf8::unchecked::iterator - utf8::unchecked::iterator un_u8it(line_start); - if (!utf32_line.empty() && *un_u8it != utf32_line.at(0)) - cout << "Line " << line_count << ": Error in utf::unchecked::iterator * operator" << '\n'; - if (std::distance(un_u8it, utf8::unchecked::iterator(line_end)) != static_cast(utf32_line.size())) - cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::distance - wrong number of characters" << '\n'; - - std::advance(un_u8it, utf32_line.size()); - if (un_u8it != utf8::unchecked::iterator(line_end)) - cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::advance" << '\n'; - } -}