Conversion to gtest of the utf8reader test.

Note that the unckecked part has been removed as the "unchecked" code
is going to be removed soon.
This commit is contained in:
King_DuckZ 2014-06-20 19:04:05 +02:00
parent 9bc11515a2
commit 90968e75cd
6 changed files with 100 additions and 176 deletions

View file

@ -1,11 +1,15 @@
project(unit CXX)
set(PATH_UTF8_INVALID_TXT "${UNITTEST_DATA_DIR}/negative/utf8_invalid.txt")
configure_file(
"${PATH_UTF8_INVALID_TXT}"
"${PROJECT_BINARY_DIR}/utf8_invalid.txt"
COPYONLY
)
set(PATH_UTF8_INVALID_TXT "${PROJECT_BINARY_DIR}/utf8_invalid.txt")
set(PATH_UTF8_VALID1_TXT "${PROJECT_BINARY_DIR}/quickbrown.txt")
set(PATH_UTF8_VALID2_HTML "${PROJECT_BINARY_DIR}/Unicode_transcriptions.html")
set(PATH_UTF8_VALID3_TXT "${PROJECT_BINARY_DIR}/UTF-8-demo.txt")
configure_file("${UNITTEST_DATA_DIR}/negative/utf8_invalid.txt" "${PATH_UTF8_INVALID_TXT}" COPYONLY)
configure_file("${UNITTEST_DATA_DIR}/utf8samples/quickbrown.txt" "${PATH_UTF8_VALID1_TXT}" COPYONLY)
configure_file("${UNITTEST_DATA_DIR}/utf8samples/Unicode_transcriptions.html" "${PATH_UTF8_VALID2_HTML}" COPYONLY)
configure_file("${UNITTEST_DATA_DIR}/utf8samples/UTF-8-demo.txt" "${PATH_UTF8_VALID3_TXT}" COPYONLY)
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/src/${PROJECT_NAME}_config.h.in"
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}_config.h"
@ -18,6 +22,7 @@ include_directories(
add_executable(${PROJECT_NAME}
${GTEST_MAIN_CPP}
src/negative.cpp
src/utf8reader.cpp
)
target_link_libraries(${PROJECT_NAME}

View file

@ -2,5 +2,8 @@
#define idAC5D2FB2938B4443A35A6841A057D467
#define PATH_UTF8_INVALID_TXT "@PATH_UTF8_INVALID_TXT@"
#define PATH_UTF8_VALID1_TXT "@PATH_UTF8_VALID1_TXT@"
#define PATH_UTF8_VALID2_HTML "@PATH_UTF8_VALID2_HTML@"
#define PATH_UTF8_VALID3_TXT "@PATH_UTF8_VALID3_TXT@"
#endif

View file

@ -0,0 +1,84 @@
#include "utf8.h"
#include "unit_config.h"
#include <gtest/gtest.h>
#include <ciso646>
#include <string>
#include <fstream>
#include <vector>
namespace {
void TestReadingFile (const char* parSourcePath) {
// Open the test file
std::ifstream fs8(parSourcePath);
ASSERT_TRUE(fs8.is_open());
// Read it line by line
unsigned int line_count = 0;
char byte;
while (!fs8.eof()) {
std::string line;
while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof())
line.push_back(byte);
line_count++;
// Play around with each line and convert it to utf16
std::string::iterator line_start = line.begin();
std::string::iterator line_end = line.end();
line_end = utf8::find_invalid(line_start, line_end);
EXPECT_EQ(line_end, line.end()) << "Line " << line_count << ": Invalid utf-8 at byte " << int(line.end() - line_end);
// Convert it to utf-16 and write to the file
std::vector<unsigned short> utf16_line;
utf8::utf8to16(line_start, line_end, std::back_inserter(utf16_line));
// Back to utf-8 and compare it to the original line.
std::string back_to_utf8;
utf8::utf16to8(utf16_line.begin(), utf16_line.end(), std::back_inserter(back_to_utf8));
EXPECT_EQ(back_to_utf8.compare(std::string(line_start, line_end)), 0) <<"Line " << line_count << ": Conversion to UTF-16 and back failed";
// Now, convert it to utf-32, back to utf-8 and compare
std::vector <unsigned> utf32_line;
utf8::utf8to32(line_start, line_end, std::back_inserter(utf32_line));
back_to_utf8.clear();
utf8::utf32to8(utf32_line.begin(), utf32_line.end(), std::back_inserter(back_to_utf8));
EXPECT_EQ(back_to_utf8.compare(std::string(line_start, line_end)), 0) << "Line " << line_count << ": Conversion to UTF-32 and back failed";
// Now, iterate and back
unsigned char_count = 0;
std::string::iterator it = line_start;
while (it != line_end) {
unsigned int next_cp = utf8::peek_next(it, line_end);
EXPECT_EQ(utf8::next(it, line_end), next_cp) << "Line " << line_count << ": Error: peek_next gave a different result than next";
char_count++;
}
EXPECT_EQ(char_count, utf32_line.size()) << "Line " << line_count << ": Error in iterating with next - wrong number of characters";
std::string::iterator adv_it = line_start;
utf8::advance(adv_it, char_count, line_end);
EXPECT_EQ(adv_it, line_end) << "Line " << line_count << ": Error in advance function";
EXPECT_EQ(std::string::size_type(utf8::distance(line_start, line_end)), char_count) << "Line " << line_count << ": Error in distance function";
while (it != line_start) {
utf8::previous(it, line.rend().base());
char_count--;
}
EXPECT_EQ(char_count, 0) << "Line " << line_count << ": Error in iterating with previous - wrong number of characters";
// Try utf8::iterator
utf8::iterator<std::string::iterator> u8it(line_start, line_start, line_end);
EXPECT_FALSE(not utf32_line.empty() and *u8it != utf32_line.at(0)) << "Line " << line_count << ": Error in utf::iterator * operator";
const size_t calculatedDist = std::distance(u8it, utf8::iterator<std::string::iterator>(line_end, line_start, line_end));
EXPECT_EQ(calculatedDist, static_cast<int>(utf32_line.size())) <<"Line " << line_count << ": Error in using utf::iterator with std::distance - wrong number of characters";
std::advance(u8it, utf32_line.size());
EXPECT_EQ(u8it, utf8::iterator<std::string::iterator>(line_end, line_start, line_end)) << "Line " << line_count << ": Error in using utf::iterator with std::advance";
}
}
} //unnamed namespace
TEST(Utf8, Reader) {
TestReadingFile(PATH_UTF8_VALID1_TXT);
TestReadingFile(PATH_UTF8_VALID2_HTML);
TestReadingFile(PATH_UTF8_VALID3_TXT);
}

View file

@ -1,7 +1,7 @@
CC = g++
CFLAGS = -g
all: smoketest regressiontest utf8readertest
all: smoketest regressiontest
smoketest:
cd smoke_test && $(MAKE) $@
@ -9,8 +9,5 @@ smoketest:
regressiontest:
cd regression_tests && $(MAKE) $@
utf8readertest:
cd utf8reader && $(MAKE) $@
clean:
rm smoke_test/smoketest regression_tests/regressiontest utf8reader/utf8reader
rm smoke_test/smoketest regression_tests/regressiontest

View file

@ -1,5 +0,0 @@
CC = g++
CFLAGS = -g -Wall -pedantic
utf8readertest: utf8reader.cpp ../../source/utf8.h ../../source/utf8/core.h ../../source/utf8/checked.h ../../source/utf8/unchecked.h
$(CC) $(CFLAGS) utf8reader.cpp -o utf8reader

View file

@ -1,160 +0,0 @@
#include "../../source/utf8.h"
using namespace utf8;
#include <string>
#include <iostream>
#include <fstream>
#include <vector>
using namespace std;
int main(int argc, char** argv)
{
if (argc != 2) {
cout << "\nUsage: utfreader filename\n";
return 0;
}
const char* TEST_FILE_PATH = argv[1];
// Open the test file
ifstream fs8(TEST_FILE_PATH);
if (!fs8.is_open()) {
cout << "Could not open " << TEST_FILE_PATH << endl;
return 0;
}
// Read it line by line
unsigned int line_count = 0;
char byte;
while (!fs8.eof()) {
string line;
while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof())
line.push_back(byte);
line_count++;
// Play around with each line and convert it to utf16
string::iterator line_start = line.begin();
string::iterator line_end = line.end();
line_end = find_invalid(line_start, line_end);
if (line_end != line.end())
cout << "Line " << line_count << ": Invalid utf-8 at byte " << int(line.end() - line_end) << '\n';
// Convert it to utf-16 and write to the file
vector<unsigned short> utf16_line;
utf8to16(line_start, line_end, back_inserter(utf16_line));
// Back to utf-8 and compare it to the original line.
string back_to_utf8;
utf16to8(utf16_line.begin(), utf16_line.end(), back_inserter(back_to_utf8));
if (back_to_utf8.compare(string(line_start, line_end)) != 0)
cout << "Line " << line_count << ": Conversion to UTF-16 and back failed" << '\n';
// Now, convert it to utf-32, back to utf-8 and compare
vector <unsigned> utf32_line;
utf8to32(line_start, line_end, back_inserter(utf32_line));
back_to_utf8.clear();
utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8));
if (back_to_utf8.compare(string(line_start, line_end)) != 0)
cout << "Line " << line_count << ": Conversion to UTF-32 and back failed" << '\n';
// Now, iterate and back
unsigned char_count = 0;
string::iterator it = line_start;
while (it != line_end) {
unsigned int next_cp = peek_next(it, line_end);
if (next(it, line_end) != next_cp)
cout << "Line " << line_count << ": Error: peek_next gave a different result than next" << '\n';
char_count++;
}
if (char_count != utf32_line.size())
cout << "Line " << line_count << ": Error in iterating with next - wrong number of characters" << '\n';
string::iterator adv_it = line_start;
utf8::advance(adv_it, char_count, line_end);
if (adv_it != line_end)
cout << "Line " << line_count << ": Error in advance function" << '\n';
if (string::size_type(utf8::distance(line_start, line_end)) != char_count)
cout << "Line " << line_count << ": Error in distance function" << '\n';
while (it != line_start) {
previous(it, line.rend().base());
char_count--;
}
if (char_count != 0)
cout << "Line " << line_count << ": Error in iterating with previous - wrong number of characters" << '\n';
// Try utf8::iterator
utf8::iterator<string::iterator> u8it(line_start, line_start, line_end);
if (!utf32_line.empty() && *u8it != utf32_line.at(0))
cout << "Line " << line_count << ": Error in utf::iterator * operator" << '\n';
if (std::distance(u8it, utf8::iterator<string::iterator>(line_end, line_start, line_end)) != static_cast<int>(utf32_line.size()))
cout << "Line " << line_count << ": Error in using utf::iterator with std::distance - wrong number of characters" << '\n';
std::advance(u8it, utf32_line.size());
if (u8it != utf8::iterator<string::iterator>(line_end, line_start, line_end))
cout << "Line " << line_count << ": Error in using utf::iterator with std::advance" << '\n';
//======================== Now, the unchecked versions ======================
// Convert it to utf-16 and compare to the checked version
vector<unsigned short> utf16_line_unchecked;
unchecked::utf8to16(line_start, line_end, back_inserter(utf16_line_unchecked));
if (utf16_line != utf16_line_unchecked)
cout << "Line " << line_count << ": Error in unchecked::utf8to16" << '\n';
// Back to utf-8 and compare it to the original line.
back_to_utf8.clear();
unchecked::utf16to8(utf16_line_unchecked.begin(), utf16_line_unchecked.end(), back_inserter(back_to_utf8));
if (back_to_utf8.compare(string(line_start, line_end)) != 0)
cout << "Line " << line_count << ": Unchecked conversion to UTF-16 and back failed" << '\n';
// Now, convert it to utf-32, back to utf-8 and compare
vector <unsigned> utf32_line_unchecked;
unchecked::utf8to32(line_start, line_end, back_inserter(utf32_line_unchecked));
if (utf32_line != utf32_line_unchecked)
cout << "Line " << line_count << ": Error in unchecked::utf8to32" << '\n';
back_to_utf8.clear();
unchecked::utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8));
if (back_to_utf8.compare(string(line_start, line_end)) != 0)
cout << "Line " << line_count << ": Unchecked conversion to UTF-32 and back failed" << '\n';
// Now, iterate and back
char_count = 0;
it = line_start;
while (it != line_end) {
unsigned int next_cp = unchecked::peek_next(it);
if (unchecked::next(it) != next_cp)
cout << "Line " << line_count << ": Error: unchecked::peek_next gave a different result than unchecked::next" << '\n';;
char_count++;
}
if (char_count != utf32_line.size())
cout << "Line " << line_count << ": Error in iterating with unchecked::next - wrong number of characters" << '\n';
adv_it = line_start;
utf8::unchecked::advance(adv_it, char_count);
if (adv_it != line_end)
cout << "Line " << line_count << ": Error in unchecked::advance function" << '\n';
if (string::size_type(utf8::unchecked::distance(line_start, line_end)) != char_count)
cout << "Line " << line_count << ": Error in unchecked::distance function" << '\n';
while (it != line_start) {
unchecked::previous(it);
char_count--;
}
if (char_count != 0)
cout << "Line " << line_count << ": Error in iterating with unchecked::previous - wrong number of characters" << '\n';
// Try utf8::unchecked::iterator
utf8::unchecked::iterator<string::iterator> un_u8it(line_start);
if (!utf32_line.empty() && *un_u8it != utf32_line.at(0))
cout << "Line " << line_count << ": Error in utf::unchecked::iterator * operator" << '\n';
if (std::distance(un_u8it, utf8::unchecked::iterator<string::iterator>(line_end)) != static_cast<int>(utf32_line.size()))
cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::distance - wrong number of characters" << '\n';
std::advance(un_u8it, utf32_line.size());
if (un_u8it != utf8::unchecked::iterator<string::iterator>(line_end))
cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::advance" << '\n';
}
}