Conversion to gtest of the utf8reader test.
Note that the unckecked part has been removed as the "unchecked" code is going to be removed soon.
This commit is contained in:
parent
9bc11515a2
commit
90968e75cd
6 changed files with 100 additions and 176 deletions
|
@ -1,11 +1,15 @@
|
|||
project(unit CXX)
|
||||
|
||||
set(PATH_UTF8_INVALID_TXT "${UNITTEST_DATA_DIR}/negative/utf8_invalid.txt")
|
||||
configure_file(
|
||||
"${PATH_UTF8_INVALID_TXT}"
|
||||
"${PROJECT_BINARY_DIR}/utf8_invalid.txt"
|
||||
COPYONLY
|
||||
)
|
||||
set(PATH_UTF8_INVALID_TXT "${PROJECT_BINARY_DIR}/utf8_invalid.txt")
|
||||
set(PATH_UTF8_VALID1_TXT "${PROJECT_BINARY_DIR}/quickbrown.txt")
|
||||
set(PATH_UTF8_VALID2_HTML "${PROJECT_BINARY_DIR}/Unicode_transcriptions.html")
|
||||
set(PATH_UTF8_VALID3_TXT "${PROJECT_BINARY_DIR}/UTF-8-demo.txt")
|
||||
|
||||
configure_file("${UNITTEST_DATA_DIR}/negative/utf8_invalid.txt" "${PATH_UTF8_INVALID_TXT}" COPYONLY)
|
||||
configure_file("${UNITTEST_DATA_DIR}/utf8samples/quickbrown.txt" "${PATH_UTF8_VALID1_TXT}" COPYONLY)
|
||||
configure_file("${UNITTEST_DATA_DIR}/utf8samples/Unicode_transcriptions.html" "${PATH_UTF8_VALID2_HTML}" COPYONLY)
|
||||
configure_file("${UNITTEST_DATA_DIR}/utf8samples/UTF-8-demo.txt" "${PATH_UTF8_VALID3_TXT}" COPYONLY)
|
||||
|
||||
configure_file(
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/src/${PROJECT_NAME}_config.h.in"
|
||||
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}_config.h"
|
||||
|
@ -18,6 +22,7 @@ include_directories(
|
|||
add_executable(${PROJECT_NAME}
|
||||
${GTEST_MAIN_CPP}
|
||||
src/negative.cpp
|
||||
src/utf8reader.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
|
|
|
@ -2,5 +2,8 @@
|
|||
#define idAC5D2FB2938B4443A35A6841A057D467
|
||||
|
||||
#define PATH_UTF8_INVALID_TXT "@PATH_UTF8_INVALID_TXT@"
|
||||
#define PATH_UTF8_VALID1_TXT "@PATH_UTF8_VALID1_TXT@"
|
||||
#define PATH_UTF8_VALID2_HTML "@PATH_UTF8_VALID2_HTML@"
|
||||
#define PATH_UTF8_VALID3_TXT "@PATH_UTF8_VALID3_TXT@"
|
||||
|
||||
#endif
|
||||
|
|
84
test/unit/tests/src/utf8reader.cpp
Normal file
84
test/unit/tests/src/utf8reader.cpp
Normal file
|
@ -0,0 +1,84 @@
|
|||
#include "utf8.h"
|
||||
#include "unit_config.h"
|
||||
#include <gtest/gtest.h>
|
||||
#include <ciso646>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
void TestReadingFile (const char* parSourcePath) {
|
||||
// Open the test file
|
||||
std::ifstream fs8(parSourcePath);
|
||||
ASSERT_TRUE(fs8.is_open());
|
||||
|
||||
// Read it line by line
|
||||
unsigned int line_count = 0;
|
||||
char byte;
|
||||
while (!fs8.eof()) {
|
||||
std::string line;
|
||||
while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof())
|
||||
line.push_back(byte);
|
||||
|
||||
line_count++;
|
||||
// Play around with each line and convert it to utf16
|
||||
std::string::iterator line_start = line.begin();
|
||||
std::string::iterator line_end = line.end();
|
||||
line_end = utf8::find_invalid(line_start, line_end);
|
||||
EXPECT_EQ(line_end, line.end()) << "Line " << line_count << ": Invalid utf-8 at byte " << int(line.end() - line_end);
|
||||
|
||||
// Convert it to utf-16 and write to the file
|
||||
std::vector<unsigned short> utf16_line;
|
||||
utf8::utf8to16(line_start, line_end, std::back_inserter(utf16_line));
|
||||
|
||||
// Back to utf-8 and compare it to the original line.
|
||||
std::string back_to_utf8;
|
||||
utf8::utf16to8(utf16_line.begin(), utf16_line.end(), std::back_inserter(back_to_utf8));
|
||||
EXPECT_EQ(back_to_utf8.compare(std::string(line_start, line_end)), 0) <<"Line " << line_count << ": Conversion to UTF-16 and back failed";
|
||||
|
||||
// Now, convert it to utf-32, back to utf-8 and compare
|
||||
std::vector <unsigned> utf32_line;
|
||||
utf8::utf8to32(line_start, line_end, std::back_inserter(utf32_line));
|
||||
back_to_utf8.clear();
|
||||
utf8::utf32to8(utf32_line.begin(), utf32_line.end(), std::back_inserter(back_to_utf8));
|
||||
EXPECT_EQ(back_to_utf8.compare(std::string(line_start, line_end)), 0) << "Line " << line_count << ": Conversion to UTF-32 and back failed";
|
||||
|
||||
// Now, iterate and back
|
||||
unsigned char_count = 0;
|
||||
std::string::iterator it = line_start;
|
||||
while (it != line_end) {
|
||||
unsigned int next_cp = utf8::peek_next(it, line_end);
|
||||
EXPECT_EQ(utf8::next(it, line_end), next_cp) << "Line " << line_count << ": Error: peek_next gave a different result than next";
|
||||
char_count++;
|
||||
}
|
||||
EXPECT_EQ(char_count, utf32_line.size()) << "Line " << line_count << ": Error in iterating with next - wrong number of characters";
|
||||
|
||||
std::string::iterator adv_it = line_start;
|
||||
utf8::advance(adv_it, char_count, line_end);
|
||||
EXPECT_EQ(adv_it, line_end) << "Line " << line_count << ": Error in advance function";
|
||||
|
||||
EXPECT_EQ(std::string::size_type(utf8::distance(line_start, line_end)), char_count) << "Line " << line_count << ": Error in distance function";
|
||||
|
||||
while (it != line_start) {
|
||||
utf8::previous(it, line.rend().base());
|
||||
char_count--;
|
||||
}
|
||||
EXPECT_EQ(char_count, 0) << "Line " << line_count << ": Error in iterating with previous - wrong number of characters";
|
||||
|
||||
// Try utf8::iterator
|
||||
utf8::iterator<std::string::iterator> u8it(line_start, line_start, line_end);
|
||||
EXPECT_FALSE(not utf32_line.empty() and *u8it != utf32_line.at(0)) << "Line " << line_count << ": Error in utf::iterator * operator";
|
||||
const size_t calculatedDist = std::distance(u8it, utf8::iterator<std::string::iterator>(line_end, line_start, line_end));
|
||||
EXPECT_EQ(calculatedDist, static_cast<int>(utf32_line.size())) <<"Line " << line_count << ": Error in using utf::iterator with std::distance - wrong number of characters";
|
||||
|
||||
std::advance(u8it, utf32_line.size());
|
||||
EXPECT_EQ(u8it, utf8::iterator<std::string::iterator>(line_end, line_start, line_end)) << "Line " << line_count << ": Error in using utf::iterator with std::advance";
|
||||
}
|
||||
}
|
||||
} //unnamed namespace
|
||||
|
||||
TEST(Utf8, Reader) {
|
||||
TestReadingFile(PATH_UTF8_VALID1_TXT);
|
||||
TestReadingFile(PATH_UTF8_VALID2_HTML);
|
||||
TestReadingFile(PATH_UTF8_VALID3_TXT);
|
||||
}
|
|
@ -1,7 +1,7 @@
|
|||
CC = g++
|
||||
CFLAGS = -g
|
||||
|
||||
all: smoketest regressiontest utf8readertest
|
||||
all: smoketest regressiontest
|
||||
|
||||
smoketest:
|
||||
cd smoke_test && $(MAKE) $@
|
||||
|
@ -9,8 +9,5 @@ smoketest:
|
|||
regressiontest:
|
||||
cd regression_tests && $(MAKE) $@
|
||||
|
||||
utf8readertest:
|
||||
cd utf8reader && $(MAKE) $@
|
||||
|
||||
clean:
|
||||
rm smoke_test/smoketest regression_tests/regressiontest utf8reader/utf8reader
|
||||
rm smoke_test/smoketest regression_tests/regressiontest
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
CC = g++
|
||||
CFLAGS = -g -Wall -pedantic
|
||||
|
||||
utf8readertest: utf8reader.cpp ../../source/utf8.h ../../source/utf8/core.h ../../source/utf8/checked.h ../../source/utf8/unchecked.h
|
||||
$(CC) $(CFLAGS) utf8reader.cpp -o utf8reader
|
|
@ -1,160 +0,0 @@
|
|||
#include "../../source/utf8.h"
|
||||
using namespace utf8;
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2) {
|
||||
cout << "\nUsage: utfreader filename\n";
|
||||
return 0;
|
||||
}
|
||||
const char* TEST_FILE_PATH = argv[1];
|
||||
// Open the test file
|
||||
ifstream fs8(TEST_FILE_PATH);
|
||||
if (!fs8.is_open()) {
|
||||
cout << "Could not open " << TEST_FILE_PATH << endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Read it line by line
|
||||
unsigned int line_count = 0;
|
||||
char byte;
|
||||
while (!fs8.eof()) {
|
||||
string line;
|
||||
while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof())
|
||||
line.push_back(byte);
|
||||
|
||||
line_count++;
|
||||
// Play around with each line and convert it to utf16
|
||||
string::iterator line_start = line.begin();
|
||||
string::iterator line_end = line.end();
|
||||
line_end = find_invalid(line_start, line_end);
|
||||
if (line_end != line.end())
|
||||
cout << "Line " << line_count << ": Invalid utf-8 at byte " << int(line.end() - line_end) << '\n';
|
||||
|
||||
// Convert it to utf-16 and write to the file
|
||||
vector<unsigned short> utf16_line;
|
||||
utf8to16(line_start, line_end, back_inserter(utf16_line));
|
||||
|
||||
// Back to utf-8 and compare it to the original line.
|
||||
string back_to_utf8;
|
||||
utf16to8(utf16_line.begin(), utf16_line.end(), back_inserter(back_to_utf8));
|
||||
if (back_to_utf8.compare(string(line_start, line_end)) != 0)
|
||||
cout << "Line " << line_count << ": Conversion to UTF-16 and back failed" << '\n';
|
||||
|
||||
// Now, convert it to utf-32, back to utf-8 and compare
|
||||
vector <unsigned> utf32_line;
|
||||
utf8to32(line_start, line_end, back_inserter(utf32_line));
|
||||
back_to_utf8.clear();
|
||||
utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8));
|
||||
if (back_to_utf8.compare(string(line_start, line_end)) != 0)
|
||||
cout << "Line " << line_count << ": Conversion to UTF-32 and back failed" << '\n';
|
||||
|
||||
// Now, iterate and back
|
||||
unsigned char_count = 0;
|
||||
string::iterator it = line_start;
|
||||
while (it != line_end) {
|
||||
unsigned int next_cp = peek_next(it, line_end);
|
||||
if (next(it, line_end) != next_cp)
|
||||
cout << "Line " << line_count << ": Error: peek_next gave a different result than next" << '\n';
|
||||
char_count++;
|
||||
}
|
||||
if (char_count != utf32_line.size())
|
||||
cout << "Line " << line_count << ": Error in iterating with next - wrong number of characters" << '\n';
|
||||
|
||||
string::iterator adv_it = line_start;
|
||||
utf8::advance(adv_it, char_count, line_end);
|
||||
if (adv_it != line_end)
|
||||
cout << "Line " << line_count << ": Error in advance function" << '\n';
|
||||
|
||||
if (string::size_type(utf8::distance(line_start, line_end)) != char_count)
|
||||
cout << "Line " << line_count << ": Error in distance function" << '\n';
|
||||
|
||||
while (it != line_start) {
|
||||
previous(it, line.rend().base());
|
||||
char_count--;
|
||||
}
|
||||
if (char_count != 0)
|
||||
cout << "Line " << line_count << ": Error in iterating with previous - wrong number of characters" << '\n';
|
||||
|
||||
// Try utf8::iterator
|
||||
utf8::iterator<string::iterator> u8it(line_start, line_start, line_end);
|
||||
if (!utf32_line.empty() && *u8it != utf32_line.at(0))
|
||||
cout << "Line " << line_count << ": Error in utf::iterator * operator" << '\n';
|
||||
if (std::distance(u8it, utf8::iterator<string::iterator>(line_end, line_start, line_end)) != static_cast<int>(utf32_line.size()))
|
||||
cout << "Line " << line_count << ": Error in using utf::iterator with std::distance - wrong number of characters" << '\n';
|
||||
|
||||
std::advance(u8it, utf32_line.size());
|
||||
if (u8it != utf8::iterator<string::iterator>(line_end, line_start, line_end))
|
||||
cout << "Line " << line_count << ": Error in using utf::iterator with std::advance" << '\n';
|
||||
|
||||
|
||||
//======================== Now, the unchecked versions ======================
|
||||
// Convert it to utf-16 and compare to the checked version
|
||||
vector<unsigned short> utf16_line_unchecked;
|
||||
unchecked::utf8to16(line_start, line_end, back_inserter(utf16_line_unchecked));
|
||||
|
||||
if (utf16_line != utf16_line_unchecked)
|
||||
cout << "Line " << line_count << ": Error in unchecked::utf8to16" << '\n';
|
||||
|
||||
// Back to utf-8 and compare it to the original line.
|
||||
back_to_utf8.clear();
|
||||
unchecked::utf16to8(utf16_line_unchecked.begin(), utf16_line_unchecked.end(), back_inserter(back_to_utf8));
|
||||
if (back_to_utf8.compare(string(line_start, line_end)) != 0)
|
||||
cout << "Line " << line_count << ": Unchecked conversion to UTF-16 and back failed" << '\n';
|
||||
|
||||
// Now, convert it to utf-32, back to utf-8 and compare
|
||||
vector <unsigned> utf32_line_unchecked;
|
||||
unchecked::utf8to32(line_start, line_end, back_inserter(utf32_line_unchecked));
|
||||
if (utf32_line != utf32_line_unchecked)
|
||||
cout << "Line " << line_count << ": Error in unchecked::utf8to32" << '\n';
|
||||
|
||||
back_to_utf8.clear();
|
||||
unchecked::utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8));
|
||||
if (back_to_utf8.compare(string(line_start, line_end)) != 0)
|
||||
cout << "Line " << line_count << ": Unchecked conversion to UTF-32 and back failed" << '\n';
|
||||
|
||||
// Now, iterate and back
|
||||
char_count = 0;
|
||||
it = line_start;
|
||||
while (it != line_end) {
|
||||
unsigned int next_cp = unchecked::peek_next(it);
|
||||
if (unchecked::next(it) != next_cp)
|
||||
cout << "Line " << line_count << ": Error: unchecked::peek_next gave a different result than unchecked::next" << '\n';;
|
||||
char_count++;
|
||||
}
|
||||
if (char_count != utf32_line.size())
|
||||
cout << "Line " << line_count << ": Error in iterating with unchecked::next - wrong number of characters" << '\n';
|
||||
|
||||
adv_it = line_start;
|
||||
utf8::unchecked::advance(adv_it, char_count);
|
||||
if (adv_it != line_end)
|
||||
cout << "Line " << line_count << ": Error in unchecked::advance function" << '\n';
|
||||
|
||||
if (string::size_type(utf8::unchecked::distance(line_start, line_end)) != char_count)
|
||||
cout << "Line " << line_count << ": Error in unchecked::distance function" << '\n';
|
||||
|
||||
while (it != line_start) {
|
||||
unchecked::previous(it);
|
||||
char_count--;
|
||||
}
|
||||
if (char_count != 0)
|
||||
cout << "Line " << line_count << ": Error in iterating with unchecked::previous - wrong number of characters" << '\n';
|
||||
|
||||
// Try utf8::unchecked::iterator
|
||||
utf8::unchecked::iterator<string::iterator> un_u8it(line_start);
|
||||
if (!utf32_line.empty() && *un_u8it != utf32_line.at(0))
|
||||
cout << "Line " << line_count << ": Error in utf::unchecked::iterator * operator" << '\n';
|
||||
if (std::distance(un_u8it, utf8::unchecked::iterator<string::iterator>(line_end)) != static_cast<int>(utf32_line.size()))
|
||||
cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::distance - wrong number of characters" << '\n';
|
||||
|
||||
std::advance(un_u8it, utf32_line.size());
|
||||
if (un_u8it != utf8::unchecked::iterator<string::iterator>(line_end))
|
||||
cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::advance" << '\n';
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue