From 90968e75cd2e051e85db7e7efdbe9f4e06adbf88 Mon Sep 17 00:00:00 2001
From: King_DuckZ <king_duckz@gmx.com>
Date: Fri, 20 Jun 2014 19:04:05 +0200
Subject: [PATCH] Conversion to gtest of the utf8reader test.

Note that the unckecked part has been removed as the "unchecked" code
is going to be removed soon.
---
 test/unit/tests/CMakeLists.txt         |  17 ++-
 test/unit/tests/src/unit_config.h.in   |   3 +
 test/unit/tests/src/utf8reader.cpp     |  84 +++++++++++++
 test_drivers/Makefile                  |   7 +-
 test_drivers/utf8reader/Makefile       |   5 -
 test_drivers/utf8reader/utf8reader.cpp | 160 -------------------------
 6 files changed, 100 insertions(+), 176 deletions(-)
 create mode 100644 test/unit/tests/src/utf8reader.cpp
 delete mode 100644 test_drivers/utf8reader/Makefile
 delete mode 100644 test_drivers/utf8reader/utf8reader.cpp
diff --git a/test/unit/tests/CMakeLists.txt b/test/unit/tests/CMakeLists.txt
index bc25294..8ee5091 100644
--- a/test/unit/tests/CMakeLists.txt
+++ b/test/unit/tests/CMakeLists.txt
@@ -1,11 +1,15 @@
 project(unit CXX)
 
-set(PATH_UTF8_INVALID_TXT "${UNITTEST_DATA_DIR}/negative/utf8_invalid.txt")
-configure_file(
-	"${PATH_UTF8_INVALID_TXT}"
-	"${PROJECT_BINARY_DIR}/utf8_invalid.txt"
-	COPYONLY
-)
+set(PATH_UTF8_INVALID_TXT "${PROJECT_BINARY_DIR}/utf8_invalid.txt")
+set(PATH_UTF8_VALID1_TXT "${PROJECT_BINARY_DIR}/quickbrown.txt")
+set(PATH_UTF8_VALID2_HTML "${PROJECT_BINARY_DIR}/Unicode_transcriptions.html")
+set(PATH_UTF8_VALID3_TXT "${PROJECT_BINARY_DIR}/UTF-8-demo.txt")
+
+configure_file("${UNITTEST_DATA_DIR}/negative/utf8_invalid.txt" "${PATH_UTF8_INVALID_TXT}" COPYONLY)
+configure_file("${UNITTEST_DATA_DIR}/utf8samples/quickbrown.txt" "${PATH_UTF8_VALID1_TXT}" COPYONLY)
+configure_file("${UNITTEST_DATA_DIR}/utf8samples/Unicode_transcriptions.html" "${PATH_UTF8_VALID2_HTML}" COPYONLY)
+configure_file("${UNITTEST_DATA_DIR}/utf8samples/UTF-8-demo.txt" "${PATH_UTF8_VALID3_TXT}" COPYONLY)
+
 configure_file(
 	"${CMAKE_CURRENT_SOURCE_DIR}/src/${PROJECT_NAME}_config.h.in"
 	"${PROJECT_BINARY_DIR}/${PROJECT_NAME}_config.h"
@@ -18,6 +22,7 @@ include_directories(
 add_executable(${PROJECT_NAME}
 	${GTEST_MAIN_CPP}
 	src/negative.cpp
+	src/utf8reader.cpp
 )
 
 target_link_libraries(${PROJECT_NAME}
diff --git a/test/unit/tests/src/unit_config.h.in b/test/unit/tests/src/unit_config.h.in
index 66c97ed..2d47a82 100644
--- a/test/unit/tests/src/unit_config.h.in
+++ b/test/unit/tests/src/unit_config.h.in
@@ -2,5 +2,8 @@
 #define idAC5D2FB2938B4443A35A6841A057D467
 
 #define PATH_UTF8_INVALID_TXT "@PATH_UTF8_INVALID_TXT@"
+#define PATH_UTF8_VALID1_TXT "@PATH_UTF8_VALID1_TXT@"
+#define PATH_UTF8_VALID2_HTML "@PATH_UTF8_VALID2_HTML@"
+#define PATH_UTF8_VALID3_TXT "@PATH_UTF8_VALID3_TXT@"
 
 #endif
diff --git a/test/unit/tests/src/utf8reader.cpp b/test/unit/tests/src/utf8reader.cpp
new file mode 100644
index 0000000..25f2f82
--- /dev/null
+++ b/test/unit/tests/src/utf8reader.cpp
@@ -0,0 +1,84 @@
+#include "utf8.h"
+#include "unit_config.h"
+#include <gtest/gtest.h>
+#include <ciso646>
+#include <string>
+#include <fstream>
+#include <vector>
+
+namespace {
+	void TestReadingFile (const char* parSourcePath) {
+		// Open the test file
+		std::ifstream fs8(parSourcePath);
+		ASSERT_TRUE(fs8.is_open());
+
+		// Read it line by line
+		unsigned int line_count = 0;
+		char byte;
+		while (!fs8.eof()) {
+			std::string line;
+			while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof())
+				line.push_back(byte);
+
+			line_count++;
+			// Play around with each line and convert it to utf16
+			std::string::iterator line_start = line.begin();
+			std::string::iterator line_end   = line.end();
+			line_end = utf8::find_invalid(line_start, line_end);
+			EXPECT_EQ(line_end, line.end()) << "Line " << line_count << ": Invalid utf-8 at byte " << int(line.end() - line_end);
+
+			// Convert it to utf-16 and write to the file
+			std::vector<unsigned short> utf16_line;
+			utf8::utf8to16(line_start, line_end, std::back_inserter(utf16_line));
+
+			// Back to utf-8 and compare it to the original line.
+			std::string back_to_utf8;
+			utf8::utf16to8(utf16_line.begin(), utf16_line.end(), std::back_inserter(back_to_utf8));
+			EXPECT_EQ(back_to_utf8.compare(std::string(line_start, line_end)), 0) <<"Line " << line_count << ": Conversion to UTF-16 and back failed";
+
+			// Now, convert it to utf-32, back to utf-8 and compare
+			std::vector <unsigned> utf32_line;
+			utf8::utf8to32(line_start, line_end, std::back_inserter(utf32_line));
+			back_to_utf8.clear();
+			utf8::utf32to8(utf32_line.begin(), utf32_line.end(), std::back_inserter(back_to_utf8));
+			EXPECT_EQ(back_to_utf8.compare(std::string(line_start, line_end)), 0) << "Line " << line_count << ": Conversion to UTF-32 and back failed";
+
+			// Now, iterate and back
+			unsigned char_count = 0;
+			std::string::iterator it = line_start;
+			while (it != line_end) {
+				unsigned int next_cp = utf8::peek_next(it, line_end);
+				EXPECT_EQ(utf8::next(it, line_end), next_cp) << "Line " << line_count << ": Error: peek_next gave a different result than next";
+				char_count++;
+			}
+			EXPECT_EQ(char_count, utf32_line.size()) << "Line " << line_count << ": Error in iterating with next - wrong number of characters";
+
+			std::string::iterator adv_it = line_start;
+			utf8::advance(adv_it, char_count, line_end);
+			EXPECT_EQ(adv_it, line_end) << "Line " << line_count << ": Error in advance function";
+
+			EXPECT_EQ(std::string::size_type(utf8::distance(line_start, line_end)), char_count) << "Line " << line_count << ": Error in distance function";
+
+			while (it != line_start) {
+				utf8::previous(it, line.rend().base());
+				char_count--;
+			}
+			EXPECT_EQ(char_count, 0) << "Line " << line_count << ": Error in iterating with previous - wrong number of characters";
+
+			// Try utf8::iterator
+			utf8::iterator<std::string::iterator> u8it(line_start, line_start, line_end);
+			EXPECT_FALSE(not utf32_line.empty() and *u8it != utf32_line.at(0)) << "Line " << line_count << ": Error in utf::iterator * operator";
+			const size_t calculatedDist = std::distance(u8it, utf8::iterator<std::string::iterator>(line_end, line_start, line_end));
+			EXPECT_EQ(calculatedDist, static_cast<int>(utf32_line.size())) <<"Line " << line_count << ": Error in using utf::iterator with std::distance - wrong number of characters";
+
+			std::advance(u8it, utf32_line.size());
+			EXPECT_EQ(u8it, utf8::iterator<std::string::iterator>(line_end, line_start, line_end)) << "Line " << line_count << ": Error in using utf::iterator with std::advance";
+		}
+	}
+} //unnamed namespace
+
+TEST(Utf8, Reader) {
+	TestReadingFile(PATH_UTF8_VALID1_TXT);
+	TestReadingFile(PATH_UTF8_VALID2_HTML);
+	TestReadingFile(PATH_UTF8_VALID3_TXT);
+}
diff --git a/test_drivers/Makefile b/test_drivers/Makefile
index 9fa1482..6d919a2 100644
--- a/test_drivers/Makefile
+++ b/test_drivers/Makefile
@@ -1,7 +1,7 @@
 CC = g++
 CFLAGS = -g
 
-all: smoketest regressiontest utf8readertest
+all: smoketest regressiontest
 
 smoketest:
 	cd smoke_test &&  $(MAKE) $@
@@ -9,8 +9,5 @@ smoketest:
 regressiontest:
 	cd regression_tests &&  $(MAKE) $@
 
-utf8readertest:
-	cd utf8reader &&  $(MAKE) $@
-
 clean: 
-	rm smoke_test/smoketest regression_tests/regressiontest utf8reader/utf8reader
+	rm smoke_test/smoketest regression_tests/regressiontest
diff --git a/test_drivers/utf8reader/Makefile b/test_drivers/utf8reader/Makefile
deleted file mode 100644
index 29a9aa8..0000000
--- a/test_drivers/utf8reader/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-CC = g++
-CFLAGS = -g -Wall -pedantic
-
-utf8readertest: utf8reader.cpp ../../source/utf8.h ../../source/utf8/core.h ../../source/utf8/checked.h ../../source/utf8/unchecked.h
-	$(CC) $(CFLAGS) utf8reader.cpp -o utf8reader
diff --git a/test_drivers/utf8reader/utf8reader.cpp b/test_drivers/utf8reader/utf8reader.cpp
deleted file mode 100644
index c88a5ee..0000000
--- a/test_drivers/utf8reader/utf8reader.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-#include "../../source/utf8.h"
-using namespace utf8;
-
-#include <string>
-#include <iostream>
-#include <fstream>
-#include <vector>
-using namespace std;
-
-int main(int argc, char** argv)
-{
-    if (argc != 2) {
-        cout << "\nUsage: utfreader filename\n";
-        return 0;
-    }
-    const char* TEST_FILE_PATH = argv[1];
-    // Open the test file
-    ifstream fs8(TEST_FILE_PATH);
-    if (!fs8.is_open()) {
-    cout << "Could not open " << TEST_FILE_PATH << endl;
-    return 0;
-    }
-
-    // Read it line by line
-    unsigned int line_count = 0;
-    char byte;
-    while (!fs8.eof()) {
-        string line;
-        while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof()) 
-            line.push_back(byte);
-
-        line_count++;
-	// Play around with each line and convert it to utf16
-        string::iterator line_start = line.begin();
-        string::iterator line_end   = line.end();
-        line_end = find_invalid(line_start, line_end);
-        if (line_end != line.end()) 
-            cout << "Line " << line_count << ": Invalid utf-8 at byte " << int(line.end() - line_end) << '\n';
-
-        // Convert it to utf-16 and write to the file
-        vector<unsigned short> utf16_line;
-        utf8to16(line_start, line_end, back_inserter(utf16_line));
-
-        // Back to utf-8 and compare it to the original line.
-        string back_to_utf8;
-        utf16to8(utf16_line.begin(), utf16_line.end(), back_inserter(back_to_utf8));
-        if (back_to_utf8.compare(string(line_start, line_end)) != 0) 
-            cout << "Line " << line_count << ": Conversion to UTF-16 and back failed" << '\n';
-
-        // Now, convert it to utf-32, back to utf-8 and compare
-        vector <unsigned> utf32_line;
-        utf8to32(line_start, line_end, back_inserter(utf32_line));
-        back_to_utf8.clear();
-        utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8));
-        if (back_to_utf8.compare(string(line_start, line_end)) != 0) 
-            cout << "Line " << line_count << ": Conversion to UTF-32 and back failed" << '\n';
-
-        // Now, iterate and back
-        unsigned char_count = 0;
-        string::iterator it = line_start;
-        while (it != line_end) {
-            unsigned int next_cp = peek_next(it, line_end);
-            if (next(it, line_end) != next_cp)
-                cout << "Line " << line_count << ": Error: peek_next gave a different result than next" << '\n';
-            char_count++;
-        }
-        if (char_count != utf32_line.size())
-            cout << "Line " << line_count << ": Error in iterating with next - wrong number of characters" << '\n';
-
-        string::iterator adv_it = line_start;
-        utf8::advance(adv_it, char_count, line_end);
-        if (adv_it != line_end)
-            cout << "Line " << line_count << ": Error in advance function" << '\n';
-
-        if (string::size_type(utf8::distance(line_start, line_end)) != char_count)
-            cout << "Line " << line_count << ": Error in distance function" << '\n';
-
-        while (it != line_start) {
-            previous(it, line.rend().base());
-            char_count--;
-        }
-        if (char_count != 0)
-            cout << "Line " << line_count << ": Error in iterating with previous - wrong number of characters" << '\n';
-
-        // Try utf8::iterator
-        utf8::iterator<string::iterator> u8it(line_start, line_start, line_end);
-        if (!utf32_line.empty() && *u8it != utf32_line.at(0))
-          cout << "Line " << line_count << ": Error in utf::iterator * operator" << '\n'; 
-        if (std::distance(u8it, utf8::iterator<string::iterator>(line_end, line_start, line_end)) != static_cast<int>(utf32_line.size()))
-          cout << "Line " << line_count << ": Error in using utf::iterator with std::distance - wrong number of characters" << '\n';
-
-        std::advance(u8it, utf32_line.size());
-        if (u8it != utf8::iterator<string::iterator>(line_end, line_start, line_end))
-          cout << "Line " << line_count << ": Error in using utf::iterator with std::advance" << '\n';
-
-
-        //======================== Now, the unchecked versions ======================
-        // Convert it to utf-16 and compare to the checked version
-        vector<unsigned short> utf16_line_unchecked;
-        unchecked::utf8to16(line_start, line_end, back_inserter(utf16_line_unchecked));
-
-        if (utf16_line != utf16_line_unchecked)
-            cout << "Line " << line_count << ": Error in unchecked::utf8to16" << '\n';
-
-        // Back to utf-8 and compare it to the original line.
-        back_to_utf8.clear();
-        unchecked::utf16to8(utf16_line_unchecked.begin(), utf16_line_unchecked.end(), back_inserter(back_to_utf8));
-        if (back_to_utf8.compare(string(line_start, line_end)) != 0) 
-            cout << "Line " << line_count << ": Unchecked conversion to UTF-16 and back failed" << '\n';
-
-        // Now, convert it to utf-32, back to utf-8 and compare
-        vector <unsigned> utf32_line_unchecked;
-        unchecked::utf8to32(line_start, line_end, back_inserter(utf32_line_unchecked));
-        if (utf32_line != utf32_line_unchecked)
-            cout << "Line " << line_count << ": Error in unchecked::utf8to32" << '\n';
-
-        back_to_utf8.clear();
-        unchecked::utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8));
-        if (back_to_utf8.compare(string(line_start, line_end)) != 0) 
-            cout << "Line " << line_count << ": Unchecked conversion to UTF-32 and back failed" << '\n';
-
-        // Now, iterate and back
-        char_count = 0;
-        it = line_start;
-        while (it != line_end) {
-            unsigned int next_cp = unchecked::peek_next(it); 
-            if (unchecked::next(it) != next_cp)
-              cout << "Line " << line_count << ": Error: unchecked::peek_next gave a different result than unchecked::next" << '\n';;
-            char_count++;
-        }
-        if (char_count != utf32_line.size())
-            cout << "Line " << line_count << ": Error in iterating with unchecked::next - wrong number of characters" << '\n';
-
-        adv_it = line_start;
-        utf8::unchecked::advance(adv_it, char_count);
-        if (adv_it != line_end)
-            cout << "Line " << line_count << ": Error in unchecked::advance function" << '\n';
-
-        if (string::size_type(utf8::unchecked::distance(line_start, line_end)) != char_count)
-            cout << "Line " << line_count << ": Error in unchecked::distance function" << '\n';
-
-        while (it != line_start) {
-            unchecked::previous(it);
-            char_count--;
-        }
-        if (char_count != 0)
-            cout << "Line " << line_count << ": Error in iterating with unchecked::previous - wrong number of characters" << '\n';
-
-        // Try utf8::unchecked::iterator
-        utf8::unchecked::iterator<string::iterator> un_u8it(line_start);
-        if (!utf32_line.empty() && *un_u8it != utf32_line.at(0))
-          cout << "Line " << line_count << ": Error in utf::unchecked::iterator * operator" << '\n'; 
-        if (std::distance(un_u8it, utf8::unchecked::iterator<string::iterator>(line_end)) != static_cast<int>(utf32_line.size()))
-          cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::distance - wrong number of characters" << '\n';
-
-        std::advance(un_u8it, utf32_line.size());
-        if (un_u8it != utf8::unchecked::iterator<string::iterator>(line_end))
-          cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::advance" << '\n';
-    }
-}