diff --git a/test_drivers/utf8reader/utf8reader.cpp b/test_drivers/utf8reader/utf8reader.cpp new file mode 100644 index 0000000..e575f39 --- /dev/null +++ b/test_drivers/utf8reader/utf8reader.cpp @@ -0,0 +1,58 @@ +#include "../../source/utf8.h" +using namespace utf8; + +#include +#include +#include +#include +using namespace std; + +int main(int argc, char** argv) +{ + if (argc != 2) { + cout << "\nUsage: utfreader filename\n"; + return 0; + } + const char* TEST_FILE_PATH = argv[1]; + // Open the test file + ifstream fs8(TEST_FILE_PATH); + if (!fs8.is_open()) { + cout << "Could not open " << TEST_FILE_PATH << endl; + return 0; + } + + // Create a file to write utf-16 text + string utf16_file_name = TEST_FILE_PATH; + utf16_file_name += "utf16.txt"; + ofstream fs16(utf16_file_name.c_str(), ios_base::out | ios_base::binary); + if (!fs16.is_open()) { + cout << "Could not open utf16.txt" << endl; + return 0; + } + const unsigned short utf16_bom = 0xfeff; + fs16.write(reinterpret_cast(&utf16_bom), sizeof(unsigned short)); + + // Read it line by line + unsigned int line_count = 0; + char byte; + while (!fs8.eof()) { + string line; + while ((byte = static_cast(fs8.get())) != '\n' && !fs8.eof()) + line.push_back(byte); + + line_count++; + // Play around with each line and convert it to utf16 + string::iterator line_start = line.begin(); + string::iterator line_end = line.end(); + line_end = find_invalid(line_start, line_end); + if (line_end != line.end()) + cout << "Line " << line_count << ": Invalid utf-8 at byte " << line.end() - line_end << '\n'; + + // Convert it to utf-16 and write to the file + vector utf16_line; + utf8to16(line_start, line_end, back_inserter(utf16_line)); + utf16_line.push_back('\n'); + fs16.write(reinterpret_cast(&utf16_line[0]), utf16_line.size() * sizeof (unsigned short)); + + } +}