Conversion to gtest of the utf8reader test.
Note that the unckecked part has been removed as the "unchecked" code is going to be removed soon.
This commit is contained in:
parent
9bc11515a2
commit
90968e75cd
6 changed files with 100 additions and 176 deletions
|
@ -1,11 +1,15 @@
|
||||||
project(unit CXX)
|
project(unit CXX)
|
||||||
|
|
||||||
set(PATH_UTF8_INVALID_TXT "${UNITTEST_DATA_DIR}/negative/utf8_invalid.txt")
|
set(PATH_UTF8_INVALID_TXT "${PROJECT_BINARY_DIR}/utf8_invalid.txt")
|
||||||
configure_file(
|
set(PATH_UTF8_VALID1_TXT "${PROJECT_BINARY_DIR}/quickbrown.txt")
|
||||||
"${PATH_UTF8_INVALID_TXT}"
|
set(PATH_UTF8_VALID2_HTML "${PROJECT_BINARY_DIR}/Unicode_transcriptions.html")
|
||||||
"${PROJECT_BINARY_DIR}/utf8_invalid.txt"
|
set(PATH_UTF8_VALID3_TXT "${PROJECT_BINARY_DIR}/UTF-8-demo.txt")
|
||||||
COPYONLY
|
|
||||||
)
|
configure_file("${UNITTEST_DATA_DIR}/negative/utf8_invalid.txt" "${PATH_UTF8_INVALID_TXT}" COPYONLY)
|
||||||
|
configure_file("${UNITTEST_DATA_DIR}/utf8samples/quickbrown.txt" "${PATH_UTF8_VALID1_TXT}" COPYONLY)
|
||||||
|
configure_file("${UNITTEST_DATA_DIR}/utf8samples/Unicode_transcriptions.html" "${PATH_UTF8_VALID2_HTML}" COPYONLY)
|
||||||
|
configure_file("${UNITTEST_DATA_DIR}/utf8samples/UTF-8-demo.txt" "${PATH_UTF8_VALID3_TXT}" COPYONLY)
|
||||||
|
|
||||||
configure_file(
|
configure_file(
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/src/${PROJECT_NAME}_config.h.in"
|
"${CMAKE_CURRENT_SOURCE_DIR}/src/${PROJECT_NAME}_config.h.in"
|
||||||
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}_config.h"
|
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}_config.h"
|
||||||
|
@ -18,6 +22,7 @@ include_directories(
|
||||||
add_executable(${PROJECT_NAME}
|
add_executable(${PROJECT_NAME}
|
||||||
${GTEST_MAIN_CPP}
|
${GTEST_MAIN_CPP}
|
||||||
src/negative.cpp
|
src/negative.cpp
|
||||||
|
src/utf8reader.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(${PROJECT_NAME}
|
target_link_libraries(${PROJECT_NAME}
|
||||||
|
|
|
@ -2,5 +2,8 @@
|
||||||
#define idAC5D2FB2938B4443A35A6841A057D467
|
#define idAC5D2FB2938B4443A35A6841A057D467
|
||||||
|
|
||||||
#define PATH_UTF8_INVALID_TXT "@PATH_UTF8_INVALID_TXT@"
|
#define PATH_UTF8_INVALID_TXT "@PATH_UTF8_INVALID_TXT@"
|
||||||
|
#define PATH_UTF8_VALID1_TXT "@PATH_UTF8_VALID1_TXT@"
|
||||||
|
#define PATH_UTF8_VALID2_HTML "@PATH_UTF8_VALID2_HTML@"
|
||||||
|
#define PATH_UTF8_VALID3_TXT "@PATH_UTF8_VALID3_TXT@"
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
84
test/unit/tests/src/utf8reader.cpp
Normal file
84
test/unit/tests/src/utf8reader.cpp
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
#include "utf8.h"
|
||||||
|
#include "unit_config.h"
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <ciso646>
|
||||||
|
#include <string>
|
||||||
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
void TestReadingFile (const char* parSourcePath) {
|
||||||
|
// Open the test file
|
||||||
|
std::ifstream fs8(parSourcePath);
|
||||||
|
ASSERT_TRUE(fs8.is_open());
|
||||||
|
|
||||||
|
// Read it line by line
|
||||||
|
unsigned int line_count = 0;
|
||||||
|
char byte;
|
||||||
|
while (!fs8.eof()) {
|
||||||
|
std::string line;
|
||||||
|
while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof())
|
||||||
|
line.push_back(byte);
|
||||||
|
|
||||||
|
line_count++;
|
||||||
|
// Play around with each line and convert it to utf16
|
||||||
|
std::string::iterator line_start = line.begin();
|
||||||
|
std::string::iterator line_end = line.end();
|
||||||
|
line_end = utf8::find_invalid(line_start, line_end);
|
||||||
|
EXPECT_EQ(line_end, line.end()) << "Line " << line_count << ": Invalid utf-8 at byte " << int(line.end() - line_end);
|
||||||
|
|
||||||
|
// Convert it to utf-16 and write to the file
|
||||||
|
std::vector<unsigned short> utf16_line;
|
||||||
|
utf8::utf8to16(line_start, line_end, std::back_inserter(utf16_line));
|
||||||
|
|
||||||
|
// Back to utf-8 and compare it to the original line.
|
||||||
|
std::string back_to_utf8;
|
||||||
|
utf8::utf16to8(utf16_line.begin(), utf16_line.end(), std::back_inserter(back_to_utf8));
|
||||||
|
EXPECT_EQ(back_to_utf8.compare(std::string(line_start, line_end)), 0) <<"Line " << line_count << ": Conversion to UTF-16 and back failed";
|
||||||
|
|
||||||
|
// Now, convert it to utf-32, back to utf-8 and compare
|
||||||
|
std::vector <unsigned> utf32_line;
|
||||||
|
utf8::utf8to32(line_start, line_end, std::back_inserter(utf32_line));
|
||||||
|
back_to_utf8.clear();
|
||||||
|
utf8::utf32to8(utf32_line.begin(), utf32_line.end(), std::back_inserter(back_to_utf8));
|
||||||
|
EXPECT_EQ(back_to_utf8.compare(std::string(line_start, line_end)), 0) << "Line " << line_count << ": Conversion to UTF-32 and back failed";
|
||||||
|
|
||||||
|
// Now, iterate and back
|
||||||
|
unsigned char_count = 0;
|
||||||
|
std::string::iterator it = line_start;
|
||||||
|
while (it != line_end) {
|
||||||
|
unsigned int next_cp = utf8::peek_next(it, line_end);
|
||||||
|
EXPECT_EQ(utf8::next(it, line_end), next_cp) << "Line " << line_count << ": Error: peek_next gave a different result than next";
|
||||||
|
char_count++;
|
||||||
|
}
|
||||||
|
EXPECT_EQ(char_count, utf32_line.size()) << "Line " << line_count << ": Error in iterating with next - wrong number of characters";
|
||||||
|
|
||||||
|
std::string::iterator adv_it = line_start;
|
||||||
|
utf8::advance(adv_it, char_count, line_end);
|
||||||
|
EXPECT_EQ(adv_it, line_end) << "Line " << line_count << ": Error in advance function";
|
||||||
|
|
||||||
|
EXPECT_EQ(std::string::size_type(utf8::distance(line_start, line_end)), char_count) << "Line " << line_count << ": Error in distance function";
|
||||||
|
|
||||||
|
while (it != line_start) {
|
||||||
|
utf8::previous(it, line.rend().base());
|
||||||
|
char_count--;
|
||||||
|
}
|
||||||
|
EXPECT_EQ(char_count, 0) << "Line " << line_count << ": Error in iterating with previous - wrong number of characters";
|
||||||
|
|
||||||
|
// Try utf8::iterator
|
||||||
|
utf8::iterator<std::string::iterator> u8it(line_start, line_start, line_end);
|
||||||
|
EXPECT_FALSE(not utf32_line.empty() and *u8it != utf32_line.at(0)) << "Line " << line_count << ": Error in utf::iterator * operator";
|
||||||
|
const size_t calculatedDist = std::distance(u8it, utf8::iterator<std::string::iterator>(line_end, line_start, line_end));
|
||||||
|
EXPECT_EQ(calculatedDist, static_cast<int>(utf32_line.size())) <<"Line " << line_count << ": Error in using utf::iterator with std::distance - wrong number of characters";
|
||||||
|
|
||||||
|
std::advance(u8it, utf32_line.size());
|
||||||
|
EXPECT_EQ(u8it, utf8::iterator<std::string::iterator>(line_end, line_start, line_end)) << "Line " << line_count << ": Error in using utf::iterator with std::advance";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} //unnamed namespace
|
||||||
|
|
||||||
|
TEST(Utf8, Reader) {
|
||||||
|
TestReadingFile(PATH_UTF8_VALID1_TXT);
|
||||||
|
TestReadingFile(PATH_UTF8_VALID2_HTML);
|
||||||
|
TestReadingFile(PATH_UTF8_VALID3_TXT);
|
||||||
|
}
|
|
@ -1,7 +1,7 @@
|
||||||
CC = g++
|
CC = g++
|
||||||
CFLAGS = -g
|
CFLAGS = -g
|
||||||
|
|
||||||
all: smoketest regressiontest utf8readertest
|
all: smoketest regressiontest
|
||||||
|
|
||||||
smoketest:
|
smoketest:
|
||||||
cd smoke_test && $(MAKE) $@
|
cd smoke_test && $(MAKE) $@
|
||||||
|
@ -9,8 +9,5 @@ smoketest:
|
||||||
regressiontest:
|
regressiontest:
|
||||||
cd regression_tests && $(MAKE) $@
|
cd regression_tests && $(MAKE) $@
|
||||||
|
|
||||||
utf8readertest:
|
|
||||||
cd utf8reader && $(MAKE) $@
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm smoke_test/smoketest regression_tests/regressiontest utf8reader/utf8reader
|
rm smoke_test/smoketest regression_tests/regressiontest
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
CC = g++
|
|
||||||
CFLAGS = -g -Wall -pedantic
|
|
||||||
|
|
||||||
utf8readertest: utf8reader.cpp ../../source/utf8.h ../../source/utf8/core.h ../../source/utf8/checked.h ../../source/utf8/unchecked.h
|
|
||||||
$(CC) $(CFLAGS) utf8reader.cpp -o utf8reader
|
|
|
@ -1,160 +0,0 @@
|
||||||
#include "../../source/utf8.h"
|
|
||||||
using namespace utf8;
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <iostream>
|
|
||||||
#include <fstream>
|
|
||||||
#include <vector>
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
int main(int argc, char** argv)
|
|
||||||
{
|
|
||||||
if (argc != 2) {
|
|
||||||
cout << "\nUsage: utfreader filename\n";
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
const char* TEST_FILE_PATH = argv[1];
|
|
||||||
// Open the test file
|
|
||||||
ifstream fs8(TEST_FILE_PATH);
|
|
||||||
if (!fs8.is_open()) {
|
|
||||||
cout << "Could not open " << TEST_FILE_PATH << endl;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read it line by line
|
|
||||||
unsigned int line_count = 0;
|
|
||||||
char byte;
|
|
||||||
while (!fs8.eof()) {
|
|
||||||
string line;
|
|
||||||
while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof())
|
|
||||||
line.push_back(byte);
|
|
||||||
|
|
||||||
line_count++;
|
|
||||||
// Play around with each line and convert it to utf16
|
|
||||||
string::iterator line_start = line.begin();
|
|
||||||
string::iterator line_end = line.end();
|
|
||||||
line_end = find_invalid(line_start, line_end);
|
|
||||||
if (line_end != line.end())
|
|
||||||
cout << "Line " << line_count << ": Invalid utf-8 at byte " << int(line.end() - line_end) << '\n';
|
|
||||||
|
|
||||||
// Convert it to utf-16 and write to the file
|
|
||||||
vector<unsigned short> utf16_line;
|
|
||||||
utf8to16(line_start, line_end, back_inserter(utf16_line));
|
|
||||||
|
|
||||||
// Back to utf-8 and compare it to the original line.
|
|
||||||
string back_to_utf8;
|
|
||||||
utf16to8(utf16_line.begin(), utf16_line.end(), back_inserter(back_to_utf8));
|
|
||||||
if (back_to_utf8.compare(string(line_start, line_end)) != 0)
|
|
||||||
cout << "Line " << line_count << ": Conversion to UTF-16 and back failed" << '\n';
|
|
||||||
|
|
||||||
// Now, convert it to utf-32, back to utf-8 and compare
|
|
||||||
vector <unsigned> utf32_line;
|
|
||||||
utf8to32(line_start, line_end, back_inserter(utf32_line));
|
|
||||||
back_to_utf8.clear();
|
|
||||||
utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8));
|
|
||||||
if (back_to_utf8.compare(string(line_start, line_end)) != 0)
|
|
||||||
cout << "Line " << line_count << ": Conversion to UTF-32 and back failed" << '\n';
|
|
||||||
|
|
||||||
// Now, iterate and back
|
|
||||||
unsigned char_count = 0;
|
|
||||||
string::iterator it = line_start;
|
|
||||||
while (it != line_end) {
|
|
||||||
unsigned int next_cp = peek_next(it, line_end);
|
|
||||||
if (next(it, line_end) != next_cp)
|
|
||||||
cout << "Line " << line_count << ": Error: peek_next gave a different result than next" << '\n';
|
|
||||||
char_count++;
|
|
||||||
}
|
|
||||||
if (char_count != utf32_line.size())
|
|
||||||
cout << "Line " << line_count << ": Error in iterating with next - wrong number of characters" << '\n';
|
|
||||||
|
|
||||||
string::iterator adv_it = line_start;
|
|
||||||
utf8::advance(adv_it, char_count, line_end);
|
|
||||||
if (adv_it != line_end)
|
|
||||||
cout << "Line " << line_count << ": Error in advance function" << '\n';
|
|
||||||
|
|
||||||
if (string::size_type(utf8::distance(line_start, line_end)) != char_count)
|
|
||||||
cout << "Line " << line_count << ": Error in distance function" << '\n';
|
|
||||||
|
|
||||||
while (it != line_start) {
|
|
||||||
previous(it, line.rend().base());
|
|
||||||
char_count--;
|
|
||||||
}
|
|
||||||
if (char_count != 0)
|
|
||||||
cout << "Line " << line_count << ": Error in iterating with previous - wrong number of characters" << '\n';
|
|
||||||
|
|
||||||
// Try utf8::iterator
|
|
||||||
utf8::iterator<string::iterator> u8it(line_start, line_start, line_end);
|
|
||||||
if (!utf32_line.empty() && *u8it != utf32_line.at(0))
|
|
||||||
cout << "Line " << line_count << ": Error in utf::iterator * operator" << '\n';
|
|
||||||
if (std::distance(u8it, utf8::iterator<string::iterator>(line_end, line_start, line_end)) != static_cast<int>(utf32_line.size()))
|
|
||||||
cout << "Line " << line_count << ": Error in using utf::iterator with std::distance - wrong number of characters" << '\n';
|
|
||||||
|
|
||||||
std::advance(u8it, utf32_line.size());
|
|
||||||
if (u8it != utf8::iterator<string::iterator>(line_end, line_start, line_end))
|
|
||||||
cout << "Line " << line_count << ": Error in using utf::iterator with std::advance" << '\n';
|
|
||||||
|
|
||||||
|
|
||||||
//======================== Now, the unchecked versions ======================
|
|
||||||
// Convert it to utf-16 and compare to the checked version
|
|
||||||
vector<unsigned short> utf16_line_unchecked;
|
|
||||||
unchecked::utf8to16(line_start, line_end, back_inserter(utf16_line_unchecked));
|
|
||||||
|
|
||||||
if (utf16_line != utf16_line_unchecked)
|
|
||||||
cout << "Line " << line_count << ": Error in unchecked::utf8to16" << '\n';
|
|
||||||
|
|
||||||
// Back to utf-8 and compare it to the original line.
|
|
||||||
back_to_utf8.clear();
|
|
||||||
unchecked::utf16to8(utf16_line_unchecked.begin(), utf16_line_unchecked.end(), back_inserter(back_to_utf8));
|
|
||||||
if (back_to_utf8.compare(string(line_start, line_end)) != 0)
|
|
||||||
cout << "Line " << line_count << ": Unchecked conversion to UTF-16 and back failed" << '\n';
|
|
||||||
|
|
||||||
// Now, convert it to utf-32, back to utf-8 and compare
|
|
||||||
vector <unsigned> utf32_line_unchecked;
|
|
||||||
unchecked::utf8to32(line_start, line_end, back_inserter(utf32_line_unchecked));
|
|
||||||
if (utf32_line != utf32_line_unchecked)
|
|
||||||
cout << "Line " << line_count << ": Error in unchecked::utf8to32" << '\n';
|
|
||||||
|
|
||||||
back_to_utf8.clear();
|
|
||||||
unchecked::utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8));
|
|
||||||
if (back_to_utf8.compare(string(line_start, line_end)) != 0)
|
|
||||||
cout << "Line " << line_count << ": Unchecked conversion to UTF-32 and back failed" << '\n';
|
|
||||||
|
|
||||||
// Now, iterate and back
|
|
||||||
char_count = 0;
|
|
||||||
it = line_start;
|
|
||||||
while (it != line_end) {
|
|
||||||
unsigned int next_cp = unchecked::peek_next(it);
|
|
||||||
if (unchecked::next(it) != next_cp)
|
|
||||||
cout << "Line " << line_count << ": Error: unchecked::peek_next gave a different result than unchecked::next" << '\n';;
|
|
||||||
char_count++;
|
|
||||||
}
|
|
||||||
if (char_count != utf32_line.size())
|
|
||||||
cout << "Line " << line_count << ": Error in iterating with unchecked::next - wrong number of characters" << '\n';
|
|
||||||
|
|
||||||
adv_it = line_start;
|
|
||||||
utf8::unchecked::advance(adv_it, char_count);
|
|
||||||
if (adv_it != line_end)
|
|
||||||
cout << "Line " << line_count << ": Error in unchecked::advance function" << '\n';
|
|
||||||
|
|
||||||
if (string::size_type(utf8::unchecked::distance(line_start, line_end)) != char_count)
|
|
||||||
cout << "Line " << line_count << ": Error in unchecked::distance function" << '\n';
|
|
||||||
|
|
||||||
while (it != line_start) {
|
|
||||||
unchecked::previous(it);
|
|
||||||
char_count--;
|
|
||||||
}
|
|
||||||
if (char_count != 0)
|
|
||||||
cout << "Line " << line_count << ": Error in iterating with unchecked::previous - wrong number of characters" << '\n';
|
|
||||||
|
|
||||||
// Try utf8::unchecked::iterator
|
|
||||||
utf8::unchecked::iterator<string::iterator> un_u8it(line_start);
|
|
||||||
if (!utf32_line.empty() && *un_u8it != utf32_line.at(0))
|
|
||||||
cout << "Line " << line_count << ": Error in utf::unchecked::iterator * operator" << '\n';
|
|
||||||
if (std::distance(un_u8it, utf8::unchecked::iterator<string::iterator>(line_end)) != static_cast<int>(utf32_line.size()))
|
|
||||||
cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::distance - wrong number of characters" << '\n';
|
|
||||||
|
|
||||||
std::advance(un_u8it, utf32_line.size());
|
|
||||||
if (un_u8it != utf8::unchecked::iterator<string::iterator>(line_end))
|
|
||||||
cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::advance" << '\n';
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Add table
Reference in a new issue