Conversion to gtest of the utf8reader test.

Note that the unckecked part has been removed as the "unchecked" code is going to be removed soon.
2014-06-20 19:04:05 +02:00 · 2014-06-20 19:04:05 +02:00 · 90968e75cd
commit 90968e75cd
parent 9bc11515a2
6 changed files with 100 additions and 176 deletions
--- a/test/unit/tests/CMakeLists.txt
+++ b/test/unit/tests/CMakeLists.txt
@ -1,11 +1,15 @@
 project(unit CXX)
-set(PATH_UTF8_INVALID_TXT "${UNITTEST_DATA_DIR}/negative/utf8_invalid.txt")
+set(PATH_UTF8_INVALID_TXT "${PROJECT_BINARY_DIR}/utf8_invalid.txt")
-configure_file(
+set(PATH_UTF8_VALID1_TXT "${PROJECT_BINARY_DIR}/quickbrown.txt")
-	"${PATH_UTF8_INVALID_TXT}"
+set(PATH_UTF8_VALID2_HTML "${PROJECT_BINARY_DIR}/Unicode_transcriptions.html")
-	"${PROJECT_BINARY_DIR}/utf8_invalid.txt"
+set(PATH_UTF8_VALID3_TXT "${PROJECT_BINARY_DIR}/UTF-8-demo.txt")
-	COPYONLY
+
-)
+configure_file("${UNITTEST_DATA_DIR}/negative/utf8_invalid.txt" "${PATH_UTF8_INVALID_TXT}" COPYONLY)
 configure_file("${UNITTEST_DATA_DIR}/utf8samples/quickbrown.txt" "${PATH_UTF8_VALID1_TXT}" COPYONLY)
 configure_file("${UNITTEST_DATA_DIR}/utf8samples/Unicode_transcriptions.html" "${PATH_UTF8_VALID2_HTML}" COPYONLY)
 configure_file("${UNITTEST_DATA_DIR}/utf8samples/UTF-8-demo.txt" "${PATH_UTF8_VALID3_TXT}" COPYONLY)
 configure_file(
 	"${CMAKE_CURRENT_SOURCE_DIR}/src/${PROJECT_NAME}_config.h.in"
 	"${PROJECT_BINARY_DIR}/${PROJECT_NAME}_config.h"
@ -18,6 +22,7 @@ include_directories(
 add_executable(${PROJECT_NAME}
 	${GTEST_MAIN_CPP}
 	src/negative.cpp
 	src/utf8reader.cpp
 )
 target_link_libraries(${PROJECT_NAME}
--- a/test/unit/tests/src/unit_config.h.in
+++ b/test/unit/tests/src/unit_config.h.in
@ -2,5 +2,8 @@
 #define idAC5D2FB2938B4443A35A6841A057D467
 #define PATH_UTF8_INVALID_TXT "@PATH_UTF8_INVALID_TXT@"
 #define PATH_UTF8_VALID1_TXT "@PATH_UTF8_VALID1_TXT@"
 #define PATH_UTF8_VALID2_HTML "@PATH_UTF8_VALID2_HTML@"
 #define PATH_UTF8_VALID3_TXT "@PATH_UTF8_VALID3_TXT@"
 #endif
--- a/test/unit/tests/src/utf8reader.cpp
+++ b/test/unit/tests/src/utf8reader.cpp
@ -0,0 +1,84 @@
 #include "utf8.h"
 #include "unit_config.h"
 #include <gtest/gtest.h>
 #include <ciso646>
 #include <string>
 #include <fstream>
 #include <vector>
 namespace {
 	void TestReadingFile (const char* parSourcePath) {
 		// Open the test file
 		std::ifstream fs8(parSourcePath);
 		ASSERT_TRUE(fs8.is_open());
 		// Read it line by line
 		unsigned int line_count = 0;
 		char byte;
 		while (!fs8.eof()) {
 			std::string line;
 			while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof())
 				line.push_back(byte);
 			line_count++;
 			// Play around with each line and convert it to utf16
 			std::string::iterator line_start = line.begin();
 			std::string::iterator line_end   = line.end();
 			line_end = utf8::find_invalid(line_start, line_end);
 			EXPECT_EQ(line_end, line.end()) << "Line " << line_count << ": Invalid utf-8 at byte " << int(line.end() - line_end);
 			// Convert it to utf-16 and write to the file
 			std::vector<unsigned short> utf16_line;
 			utf8::utf8to16(line_start, line_end, std::back_inserter(utf16_line));
 			// Back to utf-8 and compare it to the original line.
 			std::string back_to_utf8;
 			utf8::utf16to8(utf16_line.begin(), utf16_line.end(), std::back_inserter(back_to_utf8));
 			EXPECT_EQ(back_to_utf8.compare(std::string(line_start, line_end)), 0) <<"Line " << line_count << ": Conversion to UTF-16 and back failed";
 			// Now, convert it to utf-32, back to utf-8 and compare
 			std::vector <unsigned> utf32_line;
 			utf8::utf8to32(line_start, line_end, std::back_inserter(utf32_line));
 			back_to_utf8.clear();
 			utf8::utf32to8(utf32_line.begin(), utf32_line.end(), std::back_inserter(back_to_utf8));
 			EXPECT_EQ(back_to_utf8.compare(std::string(line_start, line_end)), 0) << "Line " << line_count << ": Conversion to UTF-32 and back failed";
 			// Now, iterate and back
 			unsigned char_count = 0;
 			std::string::iterator it = line_start;
 			while (it != line_end) {
 				unsigned int next_cp = utf8::peek_next(it, line_end);
 				EXPECT_EQ(utf8::next(it, line_end), next_cp) << "Line " << line_count << ": Error: peek_next gave a different result than next";
 				char_count++;
 			}
 			EXPECT_EQ(char_count, utf32_line.size()) << "Line " << line_count << ": Error in iterating with next - wrong number of characters";
 			std::string::iterator adv_it = line_start;
 			utf8::advance(adv_it, char_count, line_end);
 			EXPECT_EQ(adv_it, line_end) << "Line " << line_count << ": Error in advance function";
 			EXPECT_EQ(std::string::size_type(utf8::distance(line_start, line_end)), char_count) << "Line " << line_count << ": Error in distance function";
 			while (it != line_start) {
 				utf8::previous(it, line.rend().base());
 				char_count--;
 			}
 			EXPECT_EQ(char_count, 0) << "Line " << line_count << ": Error in iterating with previous - wrong number of characters";
 			// Try utf8::iterator
 			utf8::iterator<std::string::iterator> u8it(line_start, line_start, line_end);
 			EXPECT_FALSE(not utf32_line.empty() and *u8it != utf32_line.at(0)) << "Line " << line_count << ": Error in utf::iterator * operator";
 			const size_t calculatedDist = std::distance(u8it, utf8::iterator<std::string::iterator>(line_end, line_start, line_end));
 			EXPECT_EQ(calculatedDist, static_cast<int>(utf32_line.size())) <<"Line " << line_count << ": Error in using utf::iterator with std::distance - wrong number of characters";
 			std::advance(u8it, utf32_line.size());
 			EXPECT_EQ(u8it, utf8::iterator<std::string::iterator>(line_end, line_start, line_end)) << "Line " << line_count << ": Error in using utf::iterator with std::advance";
 		}
 	}
 } //unnamed namespace
 TEST(Utf8, Reader) {
 	TestReadingFile(PATH_UTF8_VALID1_TXT);
 	TestReadingFile(PATH_UTF8_VALID2_HTML);
 	TestReadingFile(PATH_UTF8_VALID3_TXT);
 }
--- a/test_drivers/Makefile
+++ b/test_drivers/Makefile
@ -1,7 +1,7 @@
 CC = g++
 CFLAGS = -g
-all: smoketest regressiontest utf8readertest
+all: smoketest regressiontest
 smoketest:
 	cd smoke_test &&  $(MAKE) $@
@ -9,8 +9,5 @@ smoketest:
 regressiontest:
 	cd regression_tests &&  $(MAKE) $@
 utf8readertest:
 	cd utf8reader &&  $(MAKE) $@
 clean: 
-	rm smoke_test/smoketest regression_tests/regressiontest utf8reader/utf8reader
+	rm smoke_test/smoketest regression_tests/regressiontest
--- a/test_drivers/utf8reader/Makefile
+++ b/test_drivers/utf8reader/Makefile
@ -1,5 +0,0 @@
 CC = g++
 CFLAGS = -g -Wall -pedantic
 utf8readertest: utf8reader.cpp ../../source/utf8.h ../../source/utf8/core.h ../../source/utf8/checked.h ../../source/utf8/unchecked.h
 	$(CC) $(CFLAGS) utf8reader.cpp -o utf8reader
--- a/test_drivers/utf8reader/utf8reader.cpp
+++ b/test_drivers/utf8reader/utf8reader.cpp
@ -1,160 +0,0 @@
 #include "../../source/utf8.h"
 using namespace utf8;
 #include <string>
 #include <iostream>
 #include <fstream>
 #include <vector>
 using namespace std;
 int main(int argc, char** argv)
 {
    if (argc != 2) {
        cout << "\nUsage: utfreader filename\n";
        return 0;
    }
    const char* TEST_FILE_PATH = argv[1];
    // Open the test file
    ifstream fs8(TEST_FILE_PATH);
    if (!fs8.is_open()) {
    cout << "Could not open " << TEST_FILE_PATH << endl;
    return 0;
    }
    // Read it line by line
    unsigned int line_count = 0;
    char byte;
    while (!fs8.eof()) {
        string line;
        while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof()) 
            line.push_back(byte);
        line_count++;
 	// Play around with each line and convert it to utf16
        string::iterator line_start = line.begin();
        string::iterator line_end   = line.end();
        line_end = find_invalid(line_start, line_end);
        if (line_end != line.end()) 
            cout << "Line " << line_count << ": Invalid utf-8 at byte " << int(line.end() - line_end) << '\n';
        // Convert it to utf-16 and write to the file
        vector<unsigned short> utf16_line;
        utf8to16(line_start, line_end, back_inserter(utf16_line));
        // Back to utf-8 and compare it to the original line.
        string back_to_utf8;
        utf16to8(utf16_line.begin(), utf16_line.end(), back_inserter(back_to_utf8));
        if (back_to_utf8.compare(string(line_start, line_end)) != 0) 
            cout << "Line " << line_count << ": Conversion to UTF-16 and back failed" << '\n';
        // Now, convert it to utf-32, back to utf-8 and compare
        vector <unsigned> utf32_line;
        utf8to32(line_start, line_end, back_inserter(utf32_line));
        back_to_utf8.clear();
        utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8));
        if (back_to_utf8.compare(string(line_start, line_end)) != 0) 
            cout << "Line " << line_count << ": Conversion to UTF-32 and back failed" << '\n';
        // Now, iterate and back
        unsigned char_count = 0;
        string::iterator it = line_start;
        while (it != line_end) {
            unsigned int next_cp = peek_next(it, line_end);
            if (next(it, line_end) != next_cp)
                cout << "Line " << line_count << ": Error: peek_next gave a different result than next" << '\n';
            char_count++;
        }
        if (char_count != utf32_line.size())
            cout << "Line " << line_count << ": Error in iterating with next - wrong number of characters" << '\n';
        string::iterator adv_it = line_start;
        utf8::advance(adv_it, char_count, line_end);
        if (adv_it != line_end)
            cout << "Line " << line_count << ": Error in advance function" << '\n';
        if (string::size_type(utf8::distance(line_start, line_end)) != char_count)
            cout << "Line " << line_count << ": Error in distance function" << '\n';
        while (it != line_start) {
            previous(it, line.rend().base());
            char_count--;
        }
        if (char_count != 0)
            cout << "Line " << line_count << ": Error in iterating with previous - wrong number of characters" << '\n';
        // Try utf8::iterator
        utf8::iterator<string::iterator> u8it(line_start, line_start, line_end);
        if (!utf32_line.empty() && *u8it != utf32_line.at(0))
          cout << "Line " << line_count << ": Error in utf::iterator * operator" << '\n'; 
        if (std::distance(u8it, utf8::iterator<string::iterator>(line_end, line_start, line_end)) != static_cast<int>(utf32_line.size()))
          cout << "Line " << line_count << ": Error in using utf::iterator with std::distance - wrong number of characters" << '\n';
        std::advance(u8it, utf32_line.size());
        if (u8it != utf8::iterator<string::iterator>(line_end, line_start, line_end))
          cout << "Line " << line_count << ": Error in using utf::iterator with std::advance" << '\n';
        //======================== Now, the unchecked versions ======================
        // Convert it to utf-16 and compare to the checked version
        vector<unsigned short> utf16_line_unchecked;
        unchecked::utf8to16(line_start, line_end, back_inserter(utf16_line_unchecked));
        if (utf16_line != utf16_line_unchecked)
            cout << "Line " << line_count << ": Error in unchecked::utf8to16" << '\n';
        // Back to utf-8 and compare it to the original line.
        back_to_utf8.clear();
        unchecked::utf16to8(utf16_line_unchecked.begin(), utf16_line_unchecked.end(), back_inserter(back_to_utf8));
        if (back_to_utf8.compare(string(line_start, line_end)) != 0) 
            cout << "Line " << line_count << ": Unchecked conversion to UTF-16 and back failed" << '\n';
        // Now, convert it to utf-32, back to utf-8 and compare
        vector <unsigned> utf32_line_unchecked;
        unchecked::utf8to32(line_start, line_end, back_inserter(utf32_line_unchecked));
        if (utf32_line != utf32_line_unchecked)
            cout << "Line " << line_count << ": Error in unchecked::utf8to32" << '\n';
        back_to_utf8.clear();
        unchecked::utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8));
        if (back_to_utf8.compare(string(line_start, line_end)) != 0) 
            cout << "Line " << line_count << ": Unchecked conversion to UTF-32 and back failed" << '\n';
        // Now, iterate and back
        char_count = 0;
        it = line_start;
        while (it != line_end) {
            unsigned int next_cp = unchecked::peek_next(it); 
            if (unchecked::next(it) != next_cp)
              cout << "Line " << line_count << ": Error: unchecked::peek_next gave a different result than unchecked::next" << '\n';;
            char_count++;
        }
        if (char_count != utf32_line.size())
            cout << "Line " << line_count << ": Error in iterating with unchecked::next - wrong number of characters" << '\n';
        adv_it = line_start;
        utf8::unchecked::advance(adv_it, char_count);
        if (adv_it != line_end)
            cout << "Line " << line_count << ": Error in unchecked::advance function" << '\n';
        if (string::size_type(utf8::unchecked::distance(line_start, line_end)) != char_count)
            cout << "Line " << line_count << ": Error in unchecked::distance function" << '\n';
        while (it != line_start) {
            unchecked::previous(it);
            char_count--;
        }
        if (char_count != 0)
            cout << "Line " << line_count << ": Error in iterating with unchecked::previous - wrong number of characters" << '\n';
        // Try utf8::unchecked::iterator
        utf8::unchecked::iterator<string::iterator> un_u8it(line_start);
        if (!utf32_line.empty() && *un_u8it != utf32_line.at(0))
          cout << "Line " << line_count << ": Error in utf::unchecked::iterator * operator" << '\n'; 
        if (std::distance(un_u8it, utf8::unchecked::iterator<string::iterator>(line_end)) != static_cast<int>(utf32_line.size()))
          cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::distance - wrong number of characters" << '\n';
        std::advance(un_u8it, utf32_line.size());
        if (un_u8it != utf8::unchecked::iterator<string::iterator>(line_end))
          cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::advance" << '\n';
    }
 }