diff --git a/v2_0/doc/utf8cpp.html b/v2_0/doc/utf8cpp.html index c97545e..ed6de70 100644 --- a/v2_0/doc/utf8cpp.html +++ b/v2_0/doc/utf8cpp.html @@ -57,6 +57,10 @@
- To illustrate the use of this utf8 library, we shall open a file containing UTF-8
- encoded text, check whether it starts with a byte order mark, read each line into a
- std::string
, check it for validity, convert the text to UTF-16, and
- back to UTF-8:
+ To illustrate the use of this utf8 library, let's start with a small but complete program
+ that opens a file containing UTF-8 encoded text, reads it line by line, checks each line
+ for invalid UTF-8 byte sequences, and converts it to UTF-16 encoding and back to UTF-8:
#include <fstream> @@ -135,26 +141,17 @@ return 0; } const char* test_file_path = argv[1]; - // Open the test file (must be UTF-8 encoded) + // Open the test file (contains UTF-8 encoded text) ifstream fs8(test_file_path); if (!fs8.is_open()) { cout << "Could not open " << test_file_path << endl; return 0; } - // Read the first line of the file unsigned line_count = 1; string line; - if (!getline(fs8, line)) - return 0; - // Look for utf-8 byte-order mark at the beginning - if (line.size() > 2) { - if (utf8::is_bom(line.c_str())) - cout << "There is a byte order mark at the beginning of the file\n"; - } // Play with all the lines in the file - do { + while (getline(fs8, line)) { // check for invalid utf-8 (for a simple yes/no check, there is also utf8::is_valid function) string::iterator end_it = utf8::find_invalid(line.begin(), line.end()); if (end_it != line.end()) { @@ -181,11 +178,10 @@ cout << "Error in UTF-16 conversion at line: " << line_count << "\n"; - getline(fs8, line); line_count++; - } while (!fs8.eof()); + } return 0; -} +
In the previous code sample, we have seen the use of the following functions from